From dbd0fbc76c77daac08ddd245afdcbade0d506e19 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:10 +0300 Subject: x86/tsc: Add missing header to tsc_msr.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a missing header otherwise compiler warns about missed prototype: CC arch/x86/kernel/tsc_msr.o arch/x86/kernel/tsc_msr.c:73:15: warning: no previous prototype for ‘cpu_khz_from_msr’ [-Wmissing-prototypes] unsigned long cpu_khz_from_msr(void) ^~~~~~~~~~~~~~~~ Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-4-andriy.shevchenko@linux.intel.com --- arch/x86/kernel/tsc_msr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 19afdbd7d0a7..5532d1be7687 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -12,6 +12,7 @@ #include #include #include +#include #define MAX_NUM_FREQS 9 -- cgit v1.2.3 From e2ce67b2b34b6e9d77da2f375dba5b525508f7df Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:08 +0300 Subject: x86/cpu: Introduce INTEL_CPU_FAM*() helper macros These macros are often used by drivers and there exists already a lot of duplication as ICPU() macro across the drivers. Provide a generic x86 macro for users. Note, as Ingo Molnar pointed out this has a hidden issue when a driver needs to preserve const qualifier. Though, it would be addressed separately at some point. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-2-andriy.shevchenko@linux.intel.com --- arch/x86/include/asm/intel-family.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index cf090e584202..7ed08a7c3398 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -76,4 +76,17 @@ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +/* Useful macros */ +#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ +{ \ + .vendor = X86_VENDOR_INTEL, \ + .family = _family, \ + .model = _model, \ + .feature = X86_FEATURE_ANY, \ + .driver_data = (kernel_ulong_t)&_driver_data \ +} + +#define INTEL_CPU_FAM6(_model, _driver_data) \ + INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data) + #endif /* _ASM_X86_INTEL_FAMILY_H */ -- cgit v1.2.3 From 397d3ad18dc431456baf8bce96606fa1d18b30b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:09 +0300 Subject: x86/tsc: Convert to use x86_match_cpu() and INTEL_CPU_FAM6() Move the code to use recently introduced INTEL_CPU_FAM6() macro and drop custom version of x86_match_cpu() function. No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-3-andriy.shevchenko@linux.intel.com --- arch/x86/kernel/tsc_msr.c | 83 ++++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 5532d1be7687..1465aaee543a 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -8,9 +8,11 @@ */ #include -#include -#include + #include +#include +#include +#include #include #include @@ -24,44 +26,43 @@ * field msr_plat does. */ struct freq_desc { - u8 x86_family; /* CPU family */ - u8 x86_model; /* model */ u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ u32 freqs[MAX_NUM_FREQS]; }; -static struct freq_desc freq_desc_tables[] = { - /* PNW */ - { 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }, - /* CLV+ */ - { 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } }, - /* TNG - Intel Atom processor Z3400 series */ - { 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } }, - /* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */ - { 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } }, - /* ANN - Intel Atom processor Z3500 series */ - { 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } }, - /* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */ - { 6, 0x4c, 1, { 83300, 100000, 133300, 116700, - 80000, 93300, 90000, 88900, 87500 } }, +static const struct freq_desc freq_desc_pnw = { + 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }; -static int match_cpu(u8 family, u8 model) -{ - int i; +static const struct freq_desc freq_desc_clv = { + 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } +}; - for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { - if ((family == freq_desc_tables[i].x86_family) && - (model == freq_desc_tables[i].x86_model)) - return i; - } +static const struct freq_desc freq_desc_byt = { + 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } +}; - return -1; -} +static const struct freq_desc freq_desc_cht = { + 1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 } +}; -/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ -#define id_to_freq(cpu_index, freq_id) \ - (freq_desc_tables[cpu_index].freqs[freq_id]) +static const struct freq_desc freq_desc_tng = { + 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } +}; + +static const struct freq_desc freq_desc_ann = { + 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } +}; + +static const struct x86_cpu_id tsc_msr_cpu_ids[] = { + INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw), + INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv), + INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt), + INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht), + INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng), + INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann), + {} +}; /* * MSR-based CPU/TSC frequency discovery for certain CPUs. @@ -71,18 +72,17 @@ static int match_cpu(u8 family, u8 model) */ unsigned long cpu_khz_from_msr(void) { - u32 lo, hi, ratio, freq_id, freq; + u32 lo, hi, ratio, freq; + const struct freq_desc *freq_desc; + const struct x86_cpu_id *id; unsigned long res; - int cpu_index; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + id = x86_match_cpu(tsc_msr_cpu_ids); + if (!id) return 0; - cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); - if (cpu_index < 0) - return 0; - - if (freq_desc_tables[cpu_index].msr_plat) { + freq_desc = (struct freq_desc *)id->driver_data; + if (freq_desc->msr_plat) { rdmsr(MSR_PLATFORM_INFO, lo, hi); ratio = (lo >> 8) & 0xff; } else { @@ -92,8 +92,9 @@ unsigned long cpu_khz_from_msr(void) /* Get FSB FREQ ID */ rdmsr(MSR_FSB_FREQ, lo, hi); - freq_id = lo & 0x7; - freq = id_to_freq(cpu_index, freq_id); + + /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ + freq = freq_desc->freqs[lo & 0x7]; /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ res = freq * ratio; -- cgit v1.2.3 From 5067b087cf5b2fa4de00443cdc6a66acb28a4953 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:11 +0300 Subject: x86/tsc: Use SPDX identifier and update Intel copyright Use SPDX identifier and update year in Intel copyright line. While here, remove file name from the file itself. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-5-andriy.shevchenko@linux.intel.com --- arch/x86/kernel/tsc_msr.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 1465aaee543a..f0951c2e9f28 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* - * tsc_msr.c - TSC frequency enumeration via MSR + * TSC frequency enumeration via MSR * - * Copyright (C) 2013 Intel Corporation + * Copyright (C) 2013, 2018 Intel Corporation * Author: Bin Gao - * - * This file is released under the GPLv2. */ #include -- cgit v1.2.3 From d99e5da91b36db5c35ddaf3653b280ee060971da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:12 +0300 Subject: x86/platform/intel-mid: Remove custom TSC calibration Since the commit 7da7c1561366 ("x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs") introduced a common way for all Intel MID chips to get their TSC frequency via MSRs, there is no need to keep a duplication in each of Intel MID platform code. Thus, remove the custom calibration code for good. Note, there is slight difference in how to get frequency for (reserved?) values in MSRs, i.e. legacy code enforces some defaults while new code just uses 0 in that cases. Suggested-by: Alexander Shishkin Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Cc: Bin Gao Link: https://lkml.kernel.org/r/20180629193113.84425-6-andriy.shevchenko@linux.intel.com --- arch/x86/include/asm/intel-mid.h | 14 ------- arch/x86/kernel/tsc_msr.c | 5 +++ arch/x86/platform/intel-mid/intel-mid.c | 6 --- arch/x86/platform/intel-mid/mfld.c | 36 ---------------- arch/x86/platform/intel-mid/mrfld.c | 74 --------------------------------- 5 files changed, 5 insertions(+), 130 deletions(-) diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index fe04491130ae..376eb8ada62d 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -136,20 +136,6 @@ enum intel_mid_timer_options { extern enum intel_mid_timer_options intel_mid_timer_options; -/* - * Penwell uses spread spectrum clock, so the freq number is not exactly - * the same as reported by MSR based on SDM. - */ -#define FSB_FREQ_83SKU 83200 -#define FSB_FREQ_100SKU 99840 -#define FSB_FREQ_133SKU 133000 - -#define FSB_FREQ_167SKU 167000 -#define FSB_FREQ_200SKU 200000 -#define FSB_FREQ_267SKU 267000 -#define FSB_FREQ_333SKU 333000 -#define FSB_FREQ_400SKU 400000 - /* Bus Select SoC Fuse value */ #define BSEL_SOC_FUSE_MASK 0x7 /* FSB 133MHz */ diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index f0951c2e9f28..27ef714d886c 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -29,6 +29,11 @@ struct freq_desc { u32 freqs[MAX_NUM_FREQS]; }; +/* + * Penwell and Clovertrail use spread spectrum clock, + * so the freq number is not exactly the same as reported + * by MSR based on SDM. + */ static const struct freq_desc freq_desc_pnw = { 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } }; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 2ebdf31d9996..aac15a4018d5 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -82,11 +82,6 @@ static void intel_mid_reboot(void) intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); } -static unsigned long __init intel_mid_calibrate_tsc(void) -{ - return 0; -} - static void __init intel_mid_setup_bp_timer(void) { apbt_time_init(); @@ -191,7 +186,6 @@ void __init x86_intel_mid_early_setup(void) x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock; - x86_platform.calibrate_tsc = intel_mid_calibrate_tsc; x86_platform.get_nmi_reason = intel_mid_get_nmi_reason; x86_init.pci.arch_init = intel_mid_pci_init; diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index e42978d4deaf..e66b51f5c206 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -11,48 +11,12 @@ #include -#include #include -#include #include "intel_mid_weak_decls.h" -static unsigned long __init mfld_calibrate_tsc(void) -{ - unsigned long fast_calibrate; - u32 lo, hi, ratio, fsb; - - rdmsr(MSR_IA32_PERF_STATUS, lo, hi); - pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); - ratio = (hi >> 8) & 0x1f; - pr_debug("ratio is %d\n", ratio); - if (!ratio) { - pr_err("read a zero ratio, should be incorrect!\n"); - pr_err("force tsc ratio to 16 ...\n"); - ratio = 16; - } - rdmsr(MSR_FSB_FREQ, lo, hi); - if ((lo & 0x7) == 0x7) - fsb = FSB_FREQ_83SKU; - else - fsb = FSB_FREQ_100SKU; - fast_calibrate = ratio * fsb; - pr_debug("read penwell tsc %lu khz\n", fast_calibrate); - lapic_timer_frequency = fsb * 1000 / HZ; - - /* - * TSC on Intel Atom SoCs is reliable and of known frequency. - * See tsc_msr.c for details. - */ - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); - - return fast_calibrate; -} - static void __init penwell_arch_setup(void) { - x86_platform.calibrate_tsc = mfld_calibrate_tsc; } static struct intel_mid_ops penwell_ops = { diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c index ae7bdeb0e507..c5538ec2d62d 100644 --- a/arch/x86/platform/intel-mid/mrfld.c +++ b/arch/x86/platform/intel-mid/mrfld.c @@ -11,86 +11,12 @@ #include -#include #include #include "intel_mid_weak_decls.h" -static unsigned long __init tangier_calibrate_tsc(void) -{ - unsigned long fast_calibrate; - u32 lo, hi, ratio, fsb, bus_freq; - - /* *********************** */ - /* Compute TSC:Ratio * FSB */ - /* *********************** */ - - /* Compute Ratio */ - rdmsr(MSR_PLATFORM_INFO, lo, hi); - pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); - - ratio = (lo >> 8) & 0xFF; - pr_debug("ratio is %d\n", ratio); - if (!ratio) { - pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); - ratio = 4; - } - - /* Compute FSB */ - rdmsr(MSR_FSB_FREQ, lo, hi); - pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", - hi, lo); - - bus_freq = lo & 0x7; - pr_debug("bus_freq = 0x%x\n", bus_freq); - - if (bus_freq == 0) - fsb = FSB_FREQ_100SKU; - else if (bus_freq == 1) - fsb = FSB_FREQ_100SKU; - else if (bus_freq == 2) - fsb = FSB_FREQ_133SKU; - else if (bus_freq == 3) - fsb = FSB_FREQ_167SKU; - else if (bus_freq == 4) - fsb = FSB_FREQ_83SKU; - else if (bus_freq == 5) - fsb = FSB_FREQ_400SKU; - else if (bus_freq == 6) - fsb = FSB_FREQ_267SKU; - else if (bus_freq == 7) - fsb = FSB_FREQ_333SKU; - else { - BUG(); - pr_err("Invalid bus_freq! Setting to minimal value!\n"); - fsb = FSB_FREQ_100SKU; - } - - /* TSC = FSB Freq * Resolved HFM Ratio */ - fast_calibrate = ratio * fsb; - pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); - - /* ************************************ */ - /* Calculate Local APIC Timer Frequency */ - /* ************************************ */ - lapic_timer_frequency = (fsb * 1000) / HZ; - - pr_debug("Setting lapic_timer_frequency = %d\n", - lapic_timer_frequency); - - /* - * TSC on Intel Atom SoCs is reliable and of known frequency. - * See tsc_msr.c for details. - */ - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); - - return fast_calibrate; -} - static void __init tangier_arch_setup(void) { - x86_platform.calibrate_tsc = tangier_calibrate_tsc; x86_platform.legacy.rtc = 1; } -- cgit v1.2.3 From 41afb1dfad4d6af0c716746f6a15f3230482955c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 29 Jun 2018 22:31:13 +0300 Subject: x86/platform/intel-mid: Remove per platform code After custom TSC calibration gone, there is no more reason to have custom platform code for each of Intel MID. Thus, remove it for good. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Pavel Tatashin Link: https://lkml.kernel.org/r/20180629193113.84425-7-andriy.shevchenko@linux.intel.com --- arch/x86/include/asm/intel-mid.h | 29 ------------------ arch/x86/platform/intel-mid/Makefile | 2 +- arch/x86/platform/intel-mid/intel-mid.c | 17 +---------- arch/x86/platform/intel-mid/intel_mid_weak_decls.h | 18 ------------ arch/x86/platform/intel-mid/mfld.c | 34 ---------------------- arch/x86/platform/intel-mid/mrfld.c | 31 -------------------- 6 files changed, 2 insertions(+), 129 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/intel_mid_weak_decls.h delete mode 100644 arch/x86/platform/intel-mid/mfld.c delete mode 100644 arch/x86/platform/intel-mid/mrfld.c diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 376eb8ada62d..52f815a80539 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -80,35 +80,6 @@ enum intel_mid_cpu_type { extern enum intel_mid_cpu_type __intel_mid_cpu_chip; -/** - * struct intel_mid_ops - Interface between intel-mid & sub archs - * @arch_setup: arch_setup function to re-initialize platform - * structures (x86_init, x86_platform_init) - * - * This structure can be extended if any new interface is required - * between intel-mid & its sub arch files. - */ -struct intel_mid_ops { - void (*arch_setup)(void); -}; - -/* Helper API's for INTEL_MID_OPS_INIT */ -#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ - [cpuid] = get_##cpuname##_ops - -/* Maximum number of CPU ops */ -#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) - -/* - * For every new cpu addition, a weak get__ops() function needs be - * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. - */ -#define INTEL_MID_OPS_INIT { \ - DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ - DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ - DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ -}; - #ifdef CONFIG_X86_INTEL_MID static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index fa021dfab088..5cf886c867c2 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o +obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o # SFI specific code ifdef CONFIG_X86_INTEL_MID diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index aac15a4018d5..56f66eafb94f 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -36,8 +36,6 @@ #include #include -#include "intel_mid_weak_decls.h" - /* * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, * cmdline option x86_intel_mid_timer can be used to override the configuration @@ -61,10 +59,6 @@ enum intel_mid_timer_options intel_mid_timer_options; -/* intel_mid_ops to store sub arch ops */ -static struct intel_mid_ops *intel_mid_ops; -/* getter function for sub arch ops*/ -static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT; enum intel_mid_cpu_type __intel_mid_cpu_chip; EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); @@ -128,6 +122,7 @@ static void intel_mid_arch_setup(void) case 0x3C: case 0x4A: __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; + x86_platform.legacy.rtc = 1; break; case 0x27: default: @@ -135,17 +130,7 @@ static void intel_mid_arch_setup(void) break; } - if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops)) - intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); - else { - intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); - pr_info("ARCH: Unknown SoC, assuming Penwell!\n"); - } - out: - if (intel_mid_ops->arch_setup) - intel_mid_ops->arch_setup(); - /* * Intel MID platforms are using explicitly defined regulators. * diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h deleted file mode 100644 index 3c1c3866d82b..000000000000 --- a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * intel_mid_weak_decls.h: Weak declarations of intel-mid.c - * - * (C) Copyright 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - - -/* For every CPU addition a new get__ops interface needs - * to be added. - */ -extern void *get_penwell_ops(void); -extern void *get_cloverview_ops(void); -extern void *get_tangier_ops(void); diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c deleted file mode 100644 index e66b51f5c206..000000000000 --- a/arch/x86/platform/intel-mid/mfld.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * mfld.c: Intel Medfield platform setup code - * - * (C) Copyright 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include - -#include - -#include "intel_mid_weak_decls.h" - -static void __init penwell_arch_setup(void) -{ -} - -static struct intel_mid_ops penwell_ops = { - .arch_setup = penwell_arch_setup, -}; - -void *get_penwell_ops(void) -{ - return &penwell_ops; -} - -void *get_cloverview_ops(void) -{ - return &penwell_ops; -} diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c deleted file mode 100644 index c5538ec2d62d..000000000000 --- a/arch/x86/platform/intel-mid/mrfld.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Intel Merrifield platform specific setup code - * - * (C) Copyright 2013 Intel Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include - -#include - -#include "intel_mid_weak_decls.h" - -static void __init tangier_arch_setup(void) -{ - x86_platform.legacy.rtc = 1; -} - -/* tangier arch ops */ -static struct intel_mid_ops tangier_ops = { - .arch_setup = tangier_arch_setup, -}; - -void *get_tangier_ops(void) -{ - return &tangier_ops; -} -- cgit v1.2.3 From 368a540e0232ad446931f5a4e8a5e06f69f21343 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:20 -0400 Subject: x86/kvmclock: Remove memblock dependency KVM clock is initialized later compared to other hypervisor clocks because it has a dependency on the memblock allocator. Bring it in line with other hypervisors by using memory from the BSS instead of allocating it. The benefits: - Remove ifdef from common code - Earlier availability of the clock - Remove dependency on memblock, and reduce code The downside: - Static allocation of the per cpu data structures sized NR_CPUS * 64byte Will be addressed in follow up patches. [ tglx: Split out from larger series ] Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-2-pasha.tatashin@oracle.com --- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/kvmclock.c | 66 ++++++++-------------------------------------- arch/x86/kernel/setup.c | 4 --- 3 files changed, 12 insertions(+), 59 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5b2300b818af..c65c232d3ddd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -628,6 +628,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = { .name = "KVM", .detect = kvm_detect, .type = X86_HYPER_KVM, + .init.init_platform = kvmclock_init, .init.guest_late_init = kvm_guest_init, .init.x2apic_available = kvm_para_available, }; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 3b8e7c13c614..1f6ac5aaa904 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -23,9 +23,9 @@ #include #include #include -#include #include #include +#include #include #include @@ -44,6 +44,13 @@ static int parse_no_kvmclock(char *arg) } early_param("no-kvmclock", parse_no_kvmclock); +/* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ +#define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) +#define WALL_CLOCK_SIZE (sizeof(struct pvclock_wall_clock)) + +static u8 hv_clock_mem[PAGE_ALIGN(HV_CLOCK_SIZE)] __aligned(PAGE_SIZE); +static u8 wall_clock_mem[PAGE_ALIGN(WALL_CLOCK_SIZE)] __aligned(PAGE_SIZE); + /* The hypervisor will put information about time periodically here */ static struct pvclock_vsyscall_time_info *hv_clock; static struct pvclock_wall_clock *wall_clock; @@ -245,43 +252,12 @@ static void kvm_shutdown(void) native_machine_shutdown(); } -static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size, - phys_addr_t align) -{ - phys_addr_t mem; - - mem = memblock_alloc(size, align); - if (!mem) - return 0; - - if (sev_active()) { - if (early_set_memory_decrypted((unsigned long)__va(mem), size)) - goto e_free; - } - - return mem; -e_free: - memblock_free(mem, size); - return 0; -} - -static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size) -{ - if (sev_active()) - early_set_memory_encrypted((unsigned long)__va(addr), size); - - memblock_free(addr, size); -} - void __init kvmclock_init(void) { struct pvclock_vcpu_time_info *vcpu_time; - unsigned long mem, mem_wall_clock; - int size, cpu, wall_clock_size; + int cpu; u8 flags; - size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); - if (!kvm_para_available()) return; @@ -291,28 +267,11 @@ void __init kvmclock_init(void) } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) return; - wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock)); - mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE); - if (!mem_wall_clock) - return; - - wall_clock = __va(mem_wall_clock); - memset(wall_clock, 0, wall_clock_size); - - mem = kvm_memblock_alloc(size, PAGE_SIZE); - if (!mem) { - kvm_memblock_free(mem_wall_clock, wall_clock_size); - wall_clock = NULL; - return; - } - - hv_clock = __va(mem); - memset(hv_clock, 0, size); + wall_clock = (struct pvclock_wall_clock *)wall_clock_mem; + hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; if (kvm_register_clock("primary cpu clock")) { hv_clock = NULL; - kvm_memblock_free(mem, size); - kvm_memblock_free(mem_wall_clock, wall_clock_size); wall_clock = NULL; return; } @@ -357,13 +316,10 @@ int __init kvm_setup_vsyscall_timeinfo(void) int cpu; u8 flags; struct pvclock_vcpu_time_info *vcpu_time; - unsigned int size; if (!hv_clock) return 0; - size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); - cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2f86d883dd95..da1dbd99cb6e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1197,10 +1197,6 @@ void __init setup_arch(char **cmdline_p) memblock_find_dma_reserve(); -#ifdef CONFIG_KVM_GUEST - kvmclock_init(); -#endif - tsc_early_delay_calibrate(); if (!early_xdbc_setup_hardware()) early_xdbc_register_console(); -- cgit v1.2.3 From 7ef363a39514ed8a6f2333fbae1875ac0953715a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:21 -0400 Subject: x86/kvmclock: Remove page size requirement from wall_clock There is no requirement for wall_clock data to be page aligned or page sized. Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-3-pasha.tatashin@oracle.com --- arch/x86/kernel/kvmclock.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1f6ac5aaa904..a995d7d7164c 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -46,14 +46,12 @@ early_param("no-kvmclock", parse_no_kvmclock); /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ #define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) -#define WALL_CLOCK_SIZE (sizeof(struct pvclock_wall_clock)) static u8 hv_clock_mem[PAGE_ALIGN(HV_CLOCK_SIZE)] __aligned(PAGE_SIZE); -static u8 wall_clock_mem[PAGE_ALIGN(WALL_CLOCK_SIZE)] __aligned(PAGE_SIZE); /* The hypervisor will put information about time periodically here */ static struct pvclock_vsyscall_time_info *hv_clock; -static struct pvclock_wall_clock *wall_clock; +static struct pvclock_wall_clock wall_clock; /* * The wallclock is the time of day when we booted. Since then, some time may @@ -66,15 +64,15 @@ static void kvm_get_wallclock(struct timespec64 *now) int low, high; int cpu; - low = (int)slow_virt_to_phys(wall_clock); - high = ((u64)slow_virt_to_phys(wall_clock) >> 32); + low = (int)slow_virt_to_phys(&wall_clock); + high = ((u64)slow_virt_to_phys(&wall_clock) >> 32); native_write_msr(msr_kvm_wall_clock, low, high); cpu = get_cpu(); vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(wall_clock, vcpu_time, now); + pvclock_read_wallclock(&wall_clock, vcpu_time, now); put_cpu(); } @@ -267,12 +265,10 @@ void __init kvmclock_init(void) } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) return; - wall_clock = (struct pvclock_wall_clock *)wall_clock_mem; hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; if (kvm_register_clock("primary cpu clock")) { hv_clock = NULL; - wall_clock = NULL; return; } -- cgit v1.2.3 From 7a5ddc8fe0ea9518cd7fb6a929cac7d864c6f300 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:22 -0400 Subject: x86/kvmclock: Decrapify kvm_register_clock() The return value is pointless because the wrmsr cannot fail if KVM_FEATURE_CLOCKSOURCE or KVM_FEATURE_CLOCKSOURCE2 are set. kvm_register_clock() is only called locally so wants to be static. Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-4-pasha.tatashin@oracle.com --- arch/x86/include/asm/kvm_para.h | 1 - arch/x86/kernel/kvmclock.c | 33 ++++++++++----------------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 3aea2658323a..4c723632c036 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,7 +7,6 @@ #include extern void kvmclock_init(void); -extern int kvm_register_clock(char *txt); #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index a995d7d7164c..f0a0aef5e9fa 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -187,23 +187,19 @@ struct clocksource kvm_clock = { }; EXPORT_SYMBOL_GPL(kvm_clock); -int kvm_register_clock(char *txt) +static void kvm_register_clock(char *txt) { - int cpu = smp_processor_id(); - int low, high, ret; struct pvclock_vcpu_time_info *src; + int cpu = smp_processor_id(); + u64 pa; if (!hv_clock) - return 0; + return; src = &hv_clock[cpu].pvti; - low = (int)slow_virt_to_phys(src) | 1; - high = ((u64)slow_virt_to_phys(src) >> 32); - ret = native_write_msr_safe(msr_kvm_system_time, low, high); - printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", - cpu, high, low, txt); - - return ret; + pa = slow_virt_to_phys(src) | 0x01ULL; + wrmsrl(msr_kvm_system_time, pa); + pr_info("kvm-clock: cpu %d, msr %llx, %s\n", cpu, pa, txt); } static void kvm_save_sched_clock_state(void) @@ -218,11 +214,7 @@ static void kvm_restore_sched_clock_state(void) #ifdef CONFIG_X86_LOCAL_APIC static void kvm_setup_secondary_clock(void) { - /* - * Now that the first cpu already had this clocksource initialized, - * we shouldn't fail. - */ - WARN_ON(kvm_register_clock("secondary cpu clock")); + kvm_register_clock("secondary cpu clock"); } #endif @@ -265,16 +257,11 @@ void __init kvmclock_init(void) } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) return; - hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; - - if (kvm_register_clock("primary cpu clock")) { - hv_clock = NULL; - return; - } - printk(KERN_INFO "kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); + hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; + kvm_register_clock("primary cpu clock"); pvclock_set_pvti_cpu0_va(hv_clock); if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) -- cgit v1.2.3 From 146c394d0c3c8e88df433a179c2b0b85fd8cf247 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:23 -0400 Subject: x86/kvmclock: Cleanup the code - Cleanup the mrs write for wall clock. The type casts to (int) are sloppy because the wrmsr parameters are u32 and aside of that wrmsrl() already provides the high/low split for free. - Remove the pointless get_cpu()/put_cpu() dance from various functions. Either they are called during early init where CPU is guaranteed to be 0 or they are already called from non preemptible context where smp_processor_id() can be used safely - Simplify the convoluted check for kvmclock in the init function. - Mark the parameter parsing function __init. No point in keeping it around. - Convert to pr_info() Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-5-pasha.tatashin@oracle.com --- arch/x86/kernel/kvmclock.c | 72 ++++++++++++++-------------------------------- 1 file changed, 22 insertions(+), 50 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index f0a0aef5e9fa..4afb03e49a4f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -37,7 +37,7 @@ static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; static u64 kvm_sched_clock_offset; -static int parse_no_kvmclock(char *arg) +static int __init parse_no_kvmclock(char *arg) { kvmclock = 0; return 0; @@ -61,13 +61,9 @@ static struct pvclock_wall_clock wall_clock; static void kvm_get_wallclock(struct timespec64 *now) { struct pvclock_vcpu_time_info *vcpu_time; - int low, high; int cpu; - low = (int)slow_virt_to_phys(&wall_clock); - high = ((u64)slow_virt_to_phys(&wall_clock) >> 32); - - native_write_msr(msr_kvm_wall_clock, low, high); + wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock)); cpu = get_cpu(); @@ -117,11 +113,11 @@ static inline void kvm_sched_clock_init(bool stable) kvm_sched_clock_offset = kvm_clock_read(); pv_time_ops.sched_clock = kvm_sched_clock_read; - printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", - kvm_sched_clock_offset); + pr_info("kvm-clock: using sched offset of %llu cycles", + kvm_sched_clock_offset); BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) > - sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); + sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time)); } /* @@ -135,16 +131,8 @@ static inline void kvm_sched_clock_init(bool stable) */ static unsigned long kvm_get_tsc_khz(void) { - struct pvclock_vcpu_time_info *src; - int cpu; - unsigned long tsc_khz; - - cpu = get_cpu(); - src = &hv_clock[cpu].pvti; - tsc_khz = pvclock_tsc_khz(src); - put_cpu(); setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - return tsc_khz; + return pvclock_tsc_khz(&hv_clock[0].pvti); } static void kvm_get_preset_lpj(void) @@ -161,29 +149,27 @@ static void kvm_get_preset_lpj(void) bool kvm_check_and_clear_guest_paused(void) { - bool ret = false; struct pvclock_vcpu_time_info *src; - int cpu = smp_processor_id(); + bool ret = false; if (!hv_clock) return ret; - src = &hv_clock[cpu].pvti; + src = &hv_clock[smp_processor_id()].pvti; if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { src->flags &= ~PVCLOCK_GUEST_STOPPED; pvclock_touch_watchdogs(); ret = true; } - return ret; } struct clocksource kvm_clock = { - .name = "kvm-clock", - .read = kvm_clock_get_cycles, - .rating = 400, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, + .name = "kvm-clock", + .read = kvm_clock_get_cycles, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; EXPORT_SYMBOL_GPL(kvm_clock); @@ -199,7 +185,7 @@ static void kvm_register_clock(char *txt) src = &hv_clock[cpu].pvti; pa = slow_virt_to_phys(src) | 0x01ULL; wrmsrl(msr_kvm_system_time, pa); - pr_info("kvm-clock: cpu %d, msr %llx, %s\n", cpu, pa, txt); + pr_info("kvm-clock: cpu %d, msr %llx, %s", cpu, pa, txt); } static void kvm_save_sched_clock_state(void) @@ -244,20 +230,19 @@ static void kvm_shutdown(void) void __init kvmclock_init(void) { - struct pvclock_vcpu_time_info *vcpu_time; - int cpu; u8 flags; - if (!kvm_para_available()) + if (!kvm_para_available() || !kvmclock) return; - if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { + if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) { msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW; msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW; - } else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE))) + } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { return; + } - printk(KERN_INFO "kvm-clock: Using msrs %x and %x", + pr_info("kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; @@ -267,20 +252,15 @@ void __init kvmclock_init(void) if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); - cpu = get_cpu(); - vcpu_time = &hv_clock[cpu].pvti; - flags = pvclock_read_flags(vcpu_time); - + flags = pvclock_read_flags(&hv_clock[0].pvti); kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); - put_cpu(); x86_platform.calibrate_tsc = kvm_get_tsc_khz; x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC - x86_cpuinit.early_percpu_clock_init = - kvm_setup_secondary_clock; + x86_cpuinit.early_percpu_clock_init = kvm_setup_secondary_clock; #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; @@ -296,20 +276,12 @@ void __init kvmclock_init(void) int __init kvm_setup_vsyscall_timeinfo(void) { #ifdef CONFIG_X86_64 - int cpu; u8 flags; - struct pvclock_vcpu_time_info *vcpu_time; if (!hv_clock) return 0; - cpu = get_cpu(); - - vcpu_time = &hv_clock[cpu].pvti; - flags = pvclock_read_flags(vcpu_time); - - put_cpu(); - + flags = pvclock_read_flags(&hv_clock[0].pvti); if (!(flags & PVCLOCK_TSC_STABLE_BIT)) return 1; -- cgit v1.2.3 From 42f8df935efefba51d0c5321b1325436523e3377 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:24 -0400 Subject: x86/kvmclock: Mark variables __initdata and __ro_after_init The kvmclock parameter is init data and the other variables are not modified after init. Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-6-pasha.tatashin@oracle.com --- arch/x86/kernel/kvmclock.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 4afb03e49a4f..78aec160f5e0 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -32,10 +32,10 @@ #include #include -static int kvmclock __ro_after_init = 1; -static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; -static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK; -static u64 kvm_sched_clock_offset; +static int kvmclock __initdata = 1; +static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME; +static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK; +static u64 kvm_sched_clock_offset __ro_after_init; static int __init parse_no_kvmclock(char *arg) { @@ -50,7 +50,7 @@ early_param("no-kvmclock", parse_no_kvmclock); static u8 hv_clock_mem[PAGE_ALIGN(HV_CLOCK_SIZE)] __aligned(PAGE_SIZE); /* The hypervisor will put information about time periodically here */ -static struct pvclock_vsyscall_time_info *hv_clock; +static struct pvclock_vsyscall_time_info *hv_clock __ro_after_init; static struct pvclock_wall_clock wall_clock; /* -- cgit v1.2.3 From e499a9b6dc488aff7f284bee51936f510ab7ad15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:25 -0400 Subject: x86/kvmclock: Move kvmclock vsyscall param and init to kvmclock There is no point to have this in the kvm code itself and call it from there. This can be called from an initcall and the parameter is cleared when the hypervisor is not KVM. Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-7-pasha.tatashin@oracle.com --- arch/x86/include/asm/kvm_guest.h | 7 ------- arch/x86/kernel/kvm.c | 13 ------------ arch/x86/kernel/kvmclock.c | 44 ++++++++++++++++++++++++---------------- 3 files changed, 27 insertions(+), 37 deletions(-) delete mode 100644 arch/x86/include/asm/kvm_guest.h diff --git a/arch/x86/include/asm/kvm_guest.h b/arch/x86/include/asm/kvm_guest.h deleted file mode 100644 index 46185263d9c2..000000000000 --- a/arch/x86/include/asm/kvm_guest.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_KVM_GUEST_H -#define _ASM_X86_KVM_GUEST_H - -int kvm_setup_vsyscall_timeinfo(void); - -#endif /* _ASM_X86_KVM_GUEST_H */ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c65c232d3ddd..a560750cc76f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -45,7 +45,6 @@ #include #include #include -#include static int kvmapf = 1; @@ -66,15 +65,6 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); -static int kvmclock_vsyscall = 1; -static int __init parse_no_kvmclock_vsyscall(char *arg) -{ - kvmclock_vsyscall = 0; - return 0; -} - -early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); - static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64); static int has_steal_clock = 0; @@ -560,9 +550,6 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); - if (kvmclock_vsyscall) - kvm_setup_vsyscall_timeinfo(); - #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 78aec160f5e0..7d690d2238f8 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -27,12 +27,14 @@ #include #include +#include #include #include #include #include static int kvmclock __initdata = 1; +static int kvmclock_vsyscall __initdata = 1; static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME; static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK; static u64 kvm_sched_clock_offset __ro_after_init; @@ -44,6 +46,13 @@ static int __init parse_no_kvmclock(char *arg) } early_param("no-kvmclock", parse_no_kvmclock); +static int __init parse_no_kvmclock_vsyscall(char *arg) +{ + kvmclock_vsyscall = 0; + return 0; +} +early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); + /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ #define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) @@ -228,6 +237,24 @@ static void kvm_shutdown(void) native_machine_shutdown(); } +static int __init kvm_setup_vsyscall_timeinfo(void) +{ +#ifdef CONFIG_X86_64 + u8 flags; + + if (!hv_clock || !kvmclock_vsyscall) + return 0; + + flags = pvclock_read_flags(&hv_clock[0].pvti); + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) + return 1; + + kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; +#endif + return 0; +} +early_initcall(kvm_setup_vsyscall_timeinfo); + void __init kvmclock_init(void) { u8 flags; @@ -272,20 +299,3 @@ void __init kvmclock_init(void) clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); pv_info.name = "KVM"; } - -int __init kvm_setup_vsyscall_timeinfo(void) -{ -#ifdef CONFIG_X86_64 - u8 flags; - - if (!hv_clock) - return 0; - - flags = pvclock_read_flags(&hv_clock[0].pvti); - if (!(flags & PVCLOCK_TSC_STABLE_BIT)) - return 1; - - kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; -#endif - return 0; -} -- cgit v1.2.3 From 95a3d4454bb1cf5bfd666c27fdd2dc188e17c14d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 19 Jul 2018 16:55:26 -0400 Subject: x86/kvmclock: Switch kvmclock data to a PER_CPU variable The previous removal of the memblock dependency from kvmclock introduced a static data array sized 64bytes * CONFIG_NR_CPUS. That's wasteful on large systems when kvmclock is not used. Replace it with: - A static page sized array of pvclock data. It's page sized because the pvclock data of the boot cpu is mapped into the VDSO so otherwise random other data would be exposed to the vDSO - A PER_CPU variable of pvclock data pointers. This is used to access the pcvlock data storage on each CPU. The setup is done in two stages: - Early boot stores the pointer to the static page for the boot CPU in the per cpu data. - In the preparatory stage of CPU hotplug assign either an element of the static array (when the CPU number is in that range) or allocate memory and initialize the per cpu pointer. Signed-off-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Acked-by: Paolo Bonzini Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Link: https://lkml.kernel.org/r/20180719205545.16512-8-pasha.tatashin@oracle.com --- arch/x86/kernel/kvmclock.c | 99 +++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 7d690d2238f8..91b94c0ae4e3 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -55,12 +56,23 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */ #define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS) +#define HVC_BOOT_ARRAY_SIZE \ + (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info)) -static u8 hv_clock_mem[PAGE_ALIGN(HV_CLOCK_SIZE)] __aligned(PAGE_SIZE); - -/* The hypervisor will put information about time periodically here */ -static struct pvclock_vsyscall_time_info *hv_clock __ro_after_init; +static struct pvclock_vsyscall_time_info + hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE); static struct pvclock_wall_clock wall_clock; +static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} /* * The wallclock is the time of day when we booted. Since then, some time may @@ -69,17 +81,10 @@ static struct pvclock_wall_clock wall_clock; */ static void kvm_get_wallclock(struct timespec64 *now) { - struct pvclock_vcpu_time_info *vcpu_time; - int cpu; - wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock)); - - cpu = get_cpu(); - - vcpu_time = &hv_clock[cpu].pvti; - pvclock_read_wallclock(&wall_clock, vcpu_time, now); - - put_cpu(); + preempt_disable(); + pvclock_read_wallclock(&wall_clock, this_cpu_pvti(), now); + preempt_enable(); } static int kvm_set_wallclock(const struct timespec64 *now) @@ -89,14 +94,10 @@ static int kvm_set_wallclock(const struct timespec64 *now) static u64 kvm_clock_read(void) { - struct pvclock_vcpu_time_info *src; u64 ret; - int cpu; preempt_disable_notrace(); - cpu = smp_processor_id(); - src = &hv_clock[cpu].pvti; - ret = pvclock_clocksource_read(src); + ret = pvclock_clocksource_read(this_cpu_pvti()); preempt_enable_notrace(); return ret; } @@ -141,7 +142,7 @@ static inline void kvm_sched_clock_init(bool stable) static unsigned long kvm_get_tsc_khz(void) { setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); - return pvclock_tsc_khz(&hv_clock[0].pvti); + return pvclock_tsc_khz(this_cpu_pvti()); } static void kvm_get_preset_lpj(void) @@ -158,15 +159,14 @@ static void kvm_get_preset_lpj(void) bool kvm_check_and_clear_guest_paused(void) { - struct pvclock_vcpu_time_info *src; + struct pvclock_vsyscall_time_info *src = this_cpu_hvclock(); bool ret = false; - if (!hv_clock) + if (!src) return ret; - src = &hv_clock[smp_processor_id()].pvti; - if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { - src->flags &= ~PVCLOCK_GUEST_STOPPED; + if ((src->pvti.flags & PVCLOCK_GUEST_STOPPED) != 0) { + src->pvti.flags &= ~PVCLOCK_GUEST_STOPPED; pvclock_touch_watchdogs(); ret = true; } @@ -184,17 +184,15 @@ EXPORT_SYMBOL_GPL(kvm_clock); static void kvm_register_clock(char *txt) { - struct pvclock_vcpu_time_info *src; - int cpu = smp_processor_id(); + struct pvclock_vsyscall_time_info *src = this_cpu_hvclock(); u64 pa; - if (!hv_clock) + if (!src) return; - src = &hv_clock[cpu].pvti; - pa = slow_virt_to_phys(src) | 0x01ULL; + pa = slow_virt_to_phys(&src->pvti) | 0x01ULL; wrmsrl(msr_kvm_system_time, pa); - pr_info("kvm-clock: cpu %d, msr %llx, %s", cpu, pa, txt); + pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt); } static void kvm_save_sched_clock_state(void) @@ -242,12 +240,12 @@ static int __init kvm_setup_vsyscall_timeinfo(void) #ifdef CONFIG_X86_64 u8 flags; - if (!hv_clock || !kvmclock_vsyscall) + if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall) return 0; - flags = pvclock_read_flags(&hv_clock[0].pvti); + flags = pvclock_read_flags(&hv_clock_boot[0].pvti); if (!(flags & PVCLOCK_TSC_STABLE_BIT)) - return 1; + return 0; kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK; #endif @@ -255,6 +253,28 @@ static int __init kvm_setup_vsyscall_timeinfo(void) } early_initcall(kvm_setup_vsyscall_timeinfo); +static int kvmclock_setup_percpu(unsigned int cpu) +{ + struct pvclock_vsyscall_time_info *p = per_cpu(hv_clock_per_cpu, cpu); + + /* + * The per cpu area setup replicates CPU0 data to all cpu + * pointers. So carefully check. CPU0 has been set up in init + * already. + */ + if (!cpu || (p && p != per_cpu(hv_clock_per_cpu, 0))) + return 0; + + /* Use the static page for the first CPUs, allocate otherwise */ + if (cpu < HVC_BOOT_ARRAY_SIZE) + p = &hv_clock_boot[cpu]; + else + p = kzalloc(sizeof(*p), GFP_KERNEL); + + per_cpu(hv_clock_per_cpu, cpu) = p; + return p ? 0 : -ENOMEM; +} + void __init kvmclock_init(void) { u8 flags; @@ -269,17 +289,22 @@ void __init kvmclock_init(void) return; } + if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu", + kvmclock_setup_percpu, NULL) < 0) { + return; + } + pr_info("kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); - hv_clock = (struct pvclock_vsyscall_time_info *)hv_clock_mem; + this_cpu_write(hv_clock_per_cpu, &hv_clock_boot[0]); kvm_register_clock("primary cpu clock"); - pvclock_set_pvti_cpu0_va(hv_clock); + pvclock_set_pvti_cpu0_va(hv_clock_boot); if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); - flags = pvclock_read_flags(&hv_clock[0].pvti); + flags = pvclock_read_flags(&hv_clock_boot[0].pvti); kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT); x86_platform.calibrate_tsc = kvm_get_tsc_khz; -- cgit v1.2.3 From 6fffacb30349e0903602d664f7ab6fc87e85162e Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:27 -0400 Subject: x86/alternatives, jumplabel: Use text_poke_early() before mm_init() It supposed to be safe to modify static branches after jump_label_init(). But, because static key modifying code eventually calls text_poke() it can end up accessing a struct page which has not been initialized yet. Here is how to quickly reproduce the problem. Insert code like this into init/main.c: | +static DEFINE_STATIC_KEY_FALSE(__test); | asmlinkage __visible void __init start_kernel(void) | { | char *command_line; |@@ -587,6 +609,10 @@ asmlinkage __visible void __init start_kernel(void) | vfs_caches_init_early(); | sort_main_extable(); | trap_init(); |+ { |+ static_branch_enable(&__test); |+ WARN_ON(!static_branch_likely(&__test)); |+ } | mm_init(); The following warnings show-up: WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:701 text_poke+0x20d/0x230 RIP: 0010:text_poke+0x20d/0x230 Call Trace: ? text_poke_bp+0x50/0xda ? arch_jump_label_transform+0x89/0xe0 ? __jump_label_update+0x78/0xb0 ? static_key_enable_cpuslocked+0x4d/0x80 ? static_key_enable+0x11/0x20 ? start_kernel+0x23e/0x4c8 ? secondary_startup_64+0xa5/0xb0 ---[ end trace abdc99c031b8a90a ]--- If the code above is moved after mm_init(), no warning is shown, as struct pages are initialized during handover from memblock. Use text_poke_early() in static branching until early boot IRQs are enabled and from there switch to text_poke. Also, ensure text_poke() is never invoked when unitialized memory access may happen by using adding a !after_bootmem assertion. Signed-off-by: Pavel Tatashin Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-9-pasha.tatashin@oracle.com --- arch/x86/include/asm/text-patching.h | 1 + arch/x86/kernel/alternative.c | 7 +++++++ arch/x86/kernel/jump_label.c | 11 +++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 2ecd34e2d46c..e85ff65c43c3 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern int after_bootmem; #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a481763a3776..014f214da581 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -668,6 +668,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode, local_irq_save(flags); memcpy(addr, opcode, len); local_irq_restore(flags); + sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ return addr; @@ -693,6 +694,12 @@ void *text_poke(void *addr, const void *opcode, size_t len) struct page *pages[2]; int i; + /* + * While boot memory allocator is runnig we cannot use struct + * pages as they are not yet initialized. + */ + BUG_ON(!after_bootmem); + if (!core_kernel_text((unsigned long)addr)) { pages[0] = vmalloc_to_page(addr); pages[1] = vmalloc_to_page(addr + PAGE_SIZE); diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e56c95be2808..eeea935e9bb5 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -37,15 +37,18 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - void *(*poker)(void *, const void *, size_t), - int init) +static void __ref __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + void *(*poker)(void *, const void *, size_t), + int init) { union jump_code_union code; const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; + if (early_boot_irqs_disabled) + poker = text_poke_early; + if (type == JUMP_LABEL_JMP) { if (init) { /* -- cgit v1.2.3 From 8990cac6e5ea7fa57607736019fe8dca961b998f Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:28 -0400 Subject: x86/jump_label: Initialize static branching early Static branching is useful to runtime patch branches that are used in hot path, but are infrequently changed. The x86 clock framework is one example that uses static branches to setup the best clock during boot and never changes it again. It is desired to enable the TSC based sched clock early to allow fine grained boot time analysis early on. That requires the static branching functionality to be functional early as well. Static branching requires patching nop instructions, thus, arch_init_ideal_nops() must be called prior to jump_label_init(). Do all the necessary steps to call arch_init_ideal_nops() right after early_cpu_init(), which also allows to insert a call to jump_label_init() right after that. jump_label_init() will be called again from the generic init code, but the code is protected against reinitialization already. [ tglx: Massaged changelog ] Suggested-by: Peter Zijlstra Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-10-pasha.tatashin@oracle.com --- arch/x86/kernel/cpu/amd.c | 13 ++++++++----- arch/x86/kernel/cpu/common.c | 38 ++++++++++++++++++++------------------ arch/x86/kernel/setup.c | 4 ++-- 3 files changed, 30 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 38915fbfae73..b732438c1a1e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c) } } - set_cpu_cap(c, X86_FEATURE_K7); - /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; @@ -617,6 +615,14 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_init_amd_mc(c); +#ifdef CONFIG_X86_32 + if (c->x86 == 6) + set_cpu_cap(c, X86_FEATURE_K7); +#endif + + if (c->x86 >= 0xf) + set_cpu_cap(c, X86_FEATURE_K8); + rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); /* @@ -863,9 +869,6 @@ static void init_amd(struct cpuinfo_x86 *c) init_amd_cacheinfo(c); - if (c->x86 >= 0xf) - set_cpu_cap(c, X86_FEATURE_K8); - if (cpu_has(c, X86_FEATURE_XMM2)) { unsigned long long val; int ret; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index eb4cb3efd20e..71281ac43b15 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1015,6 +1015,24 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } +/* + * The NOPL instruction is supposed to exist on all CPUs of family >= 6; + * unfortunately, that's not true in practice because of early VIA + * chips and (more importantly) broken virtualizers that are not easy + * to detect. In the latter case it doesn't even *fail* reliably, so + * probing for it doesn't even work. Disable it completely on 32-bit + * unless we can find a reliable way to detect all the broken cases. + * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). + */ +static void detect_nopl(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_32 + clear_cpu_cap(c, X86_FEATURE_NOPL); +#else + set_cpu_cap(c, X86_FEATURE_NOPL); +#endif +} + /* * Do minimum CPU detection early. * Fields really needed: vendor, cpuid_level, family, model, mask, @@ -1089,6 +1107,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) */ if (!pgtable_l5_enabled()) setup_clear_cpu_cap(X86_FEATURE_LA57); + + detect_nopl(c); } void __init early_cpu_init(void) @@ -1124,24 +1144,6 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } -/* - * The NOPL instruction is supposed to exist on all CPUs of family >= 6; - * unfortunately, that's not true in practice because of early VIA - * chips and (more importantly) broken virtualizers that are not easy - * to detect. In the latter case it doesn't even *fail* reliably, so - * probing for it doesn't even work. Disable it completely on 32-bit - * unless we can find a reliable way to detect all the broken cases. - * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). - */ -static void detect_nopl(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_32 - clear_cpu_cap(c, X86_FEATURE_NOPL); -#else - set_cpu_cap(c, X86_FEATURE_NOPL); -#endif -} - static void detect_null_seg_behavior(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index da1dbd99cb6e..7490de925a81 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -866,6 +866,8 @@ void __init setup_arch(char **cmdline_p) idt_setup_early_traps(); early_cpu_init(); + arch_init_ideal_nops(); + jump_label_init(); early_ioremap_init(); setup_olpc_ofw_pgd(); @@ -1268,8 +1270,6 @@ void __init setup_arch(char **cmdline_p) mcheck_init(); - arch_init_ideal_nops(); - register_refined_jiffies(CLOCK_TICK_RATE); #ifdef CONFIG_EFI -- cgit v1.2.3 From 9b3661cd7e5400689ed168a7275e75af333177e6 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 19 Jul 2018 16:55:29 -0400 Subject: x86/CPU: Call detect_nopl() only on the BSP Make it use the setup_* variants and have it be called only on the BSP and drop the call in generic_identify() - X86_FEATURE_NOPL will be replicated to the APs through the forced caps. Helps to keep the mess at a manageable level. Signed-off-by: Borislav Petkov Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-11-pasha.tatashin@oracle.com --- arch/x86/kernel/cpu/common.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 71281ac43b15..46408a8cdf62 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1024,12 +1024,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * unless we can find a reliable way to detect all the broken cases. * Enable it explicitly on 64-bit for non-constant inputs of cpu_has(). */ -static void detect_nopl(struct cpuinfo_x86 *c) +static void detect_nopl(void) { #ifdef CONFIG_X86_32 - clear_cpu_cap(c, X86_FEATURE_NOPL); + setup_clear_cpu_cap(X86_FEATURE_NOPL); #else - set_cpu_cap(c, X86_FEATURE_NOPL); + setup_force_cpu_cap(X86_FEATURE_NOPL); #endif } @@ -1108,7 +1108,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (!pgtable_l5_enabled()) setup_clear_cpu_cap(X86_FEATURE_LA57); - detect_nopl(c); + detect_nopl(); } void __init early_cpu_init(void) @@ -1206,8 +1206,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ - detect_nopl(c); - detect_null_seg_behavior(c); /* -- cgit v1.2.3 From fe9af81e524e8a86bdd59c0cc0d9e2b0ccaf840f Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:30 -0400 Subject: x86/tsc: Redefine notsc to behave as tsc=unstable Currently, the notsc kernel parameter disables the use of the TSC by sched_clock(). However, this parameter does not prevent the kernel from accessing tsc in other places. The only rationale to boot with notsc is to avoid timing discrepancies on multi-socket systems where TSC are not properly synchronized, and thus exclude TSC from being used for time keeping. But that prevents using TSC as sched_clock() as well, which is not necessary as the core sched_clock() implementation can handle non synchronized TSC based sched clocks just fine. However, there is another method to solve the above problem: booting with tsc=unstable parameter. This parameter allows sched_clock() to use TSC and just excludes it from timekeeping. So there is no real reason to keep notsc, but for compatibility reasons the parameter has to stay. Make it behave like 'tsc=unstable' instead. [ tglx: Massaged changelog ] Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Reviewed-by: Dou Liyang Reviewed-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-12-pasha.tatashin@oracle.com --- Documentation/admin-guide/kernel-parameters.txt | 2 -- Documentation/x86/x86_64/boot-options.txt | 4 +--- arch/x86/kernel/tsc.c | 18 +++--------------- 3 files changed, 4 insertions(+), 20 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 533ff5c68970..5aed30cd0350 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2835,8 +2835,6 @@ nosync [HW,M68K] Disables sync negotiation for all devices. - notsc [BUGS=X86-32] Disable Time Stamp Counter - nowatchdog [KNL] Disable both lockup detectors, i.e. soft-lockup and NMI watchdog (hard-lockup). diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 8d109ef67ab6..66114ab4f9fe 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -92,9 +92,7 @@ APICs Timing notsc - Don't use the CPU time stamp counter to read the wall time. - This can be used to work around timing problems on multiprocessor systems - with not properly synchronized CPUs. + Deprecated, use tsc=unstable instead. nohpet Don't use the HPET timer. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 74392d9d51e0..186395041725 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -38,11 +38,6 @@ EXPORT_SYMBOL(tsc_khz); */ static int __read_mostly tsc_unstable; -/* native_sched_clock() is called before tsc_init(), so - we must start with the TSC soft disabled to prevent - erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */ -static int __read_mostly tsc_disabled = -1; - static DEFINE_STATIC_KEY_FALSE(__use_tsc); int tsc_clocksource_reliable; @@ -248,8 +243,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); #ifdef CONFIG_X86_TSC int __init notsc_setup(char *str) { - pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n"); - tsc_disabled = 1; + mark_tsc_unstable("boot parameter notsc"); return 1; } #else @@ -1307,7 +1301,7 @@ unreg: static int __init init_tsc_clocksource(void) { - if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) + if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz) return 0; if (tsc_unstable) @@ -1414,12 +1408,6 @@ void __init tsc_init(void) set_cyc2ns_scale(tsc_khz, cpu, cyc); } - if (tsc_disabled > 0) - return; - - /* now allow native_sched_clock() to use rdtsc */ - - tsc_disabled = 0; static_branch_enable(&__use_tsc); if (!no_sched_irq_time) @@ -1455,7 +1443,7 @@ unsigned long calibrate_delay_is_known(void) int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); const struct cpumask *mask = topology_core_cpumask(cpu); - if (tsc_disabled || !constant_tsc || !mask) + if (!constant_tsc || !mask) return 0; sibling = cpumask_any_but(mask, cpu); -- cgit v1.2.3 From 7b25b9cb0dad8395b5cf5a02196d0e88ccda67d5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:31 -0400 Subject: x86/xen/time: Initialize pv xen time in init_hypervisor_platform() In every hypervisor except for xen pv time ops are initialized in init_hypervisor_platform(). Xen PV domains initialize time ops in x86_init.paging.pagetable_init(), by calling xen_setup_shared_info() which is a poor design, as time is needed prior to memory allocator. xen_setup_shared_info() is called from two places: during boot, and after suspend. Split the content of xen_setup_shared_info() into three places: 1. add the clock relavent data into new xen pv init_platform vector, and set clock ops in there. 2. move xen_setup_vcpu_info_placement() to new xen_pv_guest_late_init() call. 3. Re-initializing parts of shared info copy to xen_pv_post_suspend() to be symmetric to xen_pv_pre_suspend Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-13-pasha.tatashin@oracle.com --- arch/x86/xen/enlighten_pv.c | 51 +++++++++++++++++++++------------------------ arch/x86/xen/mmu_pv.c | 6 ++---- arch/x86/xen/suspend_pv.c | 5 +++-- arch/x86/xen/time.c | 7 +++---- arch/x86/xen/xen-ops.h | 6 ++---- 5 files changed, 34 insertions(+), 41 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 439a94bf89ad..105a57d73701 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -119,6 +119,27 @@ static void __init xen_banner(void) version >> 16, version & 0xffff, extra.extraversion, xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } + +static void __init xen_pv_init_platform(void) +{ + set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); + HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + + /* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */ + xen_vcpu_info_reset(0); + + /* pvclock is in shared info area */ + xen_init_time_ops(); +} + +static void __init xen_pv_guest_late_init(void) +{ +#ifndef CONFIG_SMP + /* Setup shared vcpu info for non-smp configurations */ + xen_setup_vcpu_info_placement(); +#endif +} + /* Check if running on Xen version (major, minor) or later */ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor) @@ -947,34 +968,8 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high) xen_write_msr_safe(msr, low, high); } -void xen_setup_shared_info(void) -{ - set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); - - HYPERVISOR_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); - - xen_setup_mfn_list_list(); - - if (system_state == SYSTEM_BOOTING) { -#ifndef CONFIG_SMP - /* - * In UP this is as good a place as any to set up shared info. - * Limit this to boot only, at restore vcpu setup is done via - * xen_vcpu_restore(). - */ - xen_setup_vcpu_info_placement(); -#endif - /* - * Now that shared info is set up we can start using routines - * that point to pvclock area. - */ - xen_init_time_ops(); - } -} - /* This is called once we have the cpu_possible_mask */ -void __ref xen_setup_vcpu_info_placement(void) +void __init xen_setup_vcpu_info_placement(void) { int cpu; @@ -1228,6 +1223,8 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; + x86_init.hyper.init_platform = xen_pv_init_platform; + x86_init.hyper.guest_late_init = xen_pv_guest_late_init; /* * Set up some pagetable state before starting to set any ptes. diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2c30cabfda90..52206ad81e4b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1230,8 +1230,7 @@ static void __init xen_pagetable_p2m_free(void) * We roundup to the PMD, which means that if anybody at this stage is * using the __ka address of xen_start_info or * xen_start_info->shared_info they are in going to crash. Fortunatly - * we have already revectored in xen_setup_kernel_pagetable and in - * xen_setup_shared_info. + * we have already revectored in xen_setup_kernel_pagetable. */ size = roundup(size, PMD_SIZE); @@ -1292,8 +1291,7 @@ static void __init xen_pagetable_init(void) /* Remap memory freed due to conflicts with E820 map */ xen_remap_memory(); - - xen_setup_shared_info(); + xen_setup_mfn_list_list(); } static void xen_write_cr2(unsigned long cr2) { diff --git a/arch/x86/xen/suspend_pv.c b/arch/x86/xen/suspend_pv.c index a2e0f110af56..8303b58c79a9 100644 --- a/arch/x86/xen/suspend_pv.c +++ b/arch/x86/xen/suspend_pv.c @@ -27,8 +27,9 @@ void xen_pv_pre_suspend(void) void xen_pv_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); - - xen_setup_shared_info(); + set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info); + HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); + xen_setup_mfn_list_list(); if (suspend_cancelled) { xen_start_info->store_mfn = diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index e0f1bcf01d63..53bb7a8d10b5 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -40,7 +40,7 @@ static unsigned long xen_tsc_khz(void) return pvclock_tsc_khz(info); } -u64 xen_clocksource_read(void) +static u64 xen_clocksource_read(void) { struct pvclock_vcpu_time_info *src; u64 ret; @@ -503,7 +503,7 @@ static void __init xen_time_init(void) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } -void __ref xen_init_time_ops(void) +void __init xen_init_time_ops(void) { pv_time_ops = xen_time_ops; @@ -542,8 +542,7 @@ void __init xen_hvm_init_time_ops(void) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { - printk(KERN_INFO "Xen doesn't support pvclock on HVM," - "disable pv timer\n"); + pr_info("Xen doesn't support pvclock on HVM, disable pv timer"); return; } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 3b34745d0a52..e78684597f57 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -31,7 +31,6 @@ extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); -void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); @@ -68,12 +67,11 @@ void xen_init_irq_ops(void); void xen_setup_timer(int cpu); void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); -u64 xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); void xen_save_time_memory_area(void); void xen_restore_time_memory_area(void); -void __ref xen_init_time_ops(void); -void __init xen_hvm_init_time_ops(void); +void xen_init_time_ops(void); +void xen_hvm_init_time_ops(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); -- cgit v1.2.3 From 38669ba205d178d2d38bfd194a196d65a44d5af2 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:32 -0400 Subject: x86/xen/time: Output xen sched_clock time from 0 It is expected for sched_clock() to output data from 0, when system boots. Add an offset xen_sched_clock_offset (similarly how it is done in other hypervisors i.e. kvm_sched_clock_offset) to count sched_clock() from 0, when time is first initialized. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-14-pasha.tatashin@oracle.com --- arch/x86/xen/time.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 53bb7a8d10b5..c84f1e039d84 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -31,6 +31,8 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 +static u64 xen_sched_clock_offset __read_mostly; + /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) { @@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs) return xen_clocksource_read(); } +static u64 xen_sched_clock(void) +{ + return xen_clocksource_read() - xen_sched_clock_offset; +} + static void xen_read_wallclock(struct timespec64 *ts) { struct shared_info *s = HYPERVISOR_shared_info; @@ -367,7 +374,7 @@ void xen_timer_resume(void) } static const struct pv_time_ops xen_time_ops __initconst = { - .sched_clock = xen_clocksource_read, + .sched_clock = xen_sched_clock, .steal_clock = xen_steal_clock, }; @@ -505,6 +512,7 @@ static void __init xen_time_init(void) void __init xen_init_time_ops(void) { + xen_sched_clock_offset = xen_clocksource_read(); pv_time_ops = xen_time_ops; x86_init.timers.timer_init = xen_time_init; @@ -546,6 +554,7 @@ void __init xen_hvm_init_time_ops(void) return; } + xen_sched_clock_offset = xen_clocksource_read(); pv_time_ops = xen_time_ops; x86_init.timers.setup_percpu_clockev = xen_time_init; x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; -- cgit v1.2.3 From be2e0e4257678408b0ab00ea9e743b9094e393e8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:33 -0400 Subject: s390/time: Add read_persistent_wall_and_boot_offset() read_persistent_wall_and_boot_offset() will replace read_boot_clock64() because on some architectures it is more convenient to read both sources as one may depend on the other. For s390, implementation is the same as read_boot_clock64() but also calling and returning value of read_persistent_clock64() Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Reviewed-by: Martin Schwidefsky Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-15-pasha.tatashin@oracle.com --- arch/s390/kernel/time.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index cf561160ea88..d1f5447d5687 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -221,6 +221,24 @@ void read_persistent_clock64(struct timespec64 *ts) ext_to_timespec64(clk, ts); } +void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, + struct timespec64 *boot_offset) +{ + unsigned char clk[STORE_CLOCK_EXT_SIZE]; + struct timespec64 boot_time; + __u64 delta; + + delta = initial_leap_seconds + TOD_UNIX_EPOCH; + memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE); + *(__u64 *)&clk[1] -= delta; + if (*(__u64 *)&clk[1] > delta) + clk[0]--; + ext_to_timespec64(clk, &boot_time); + + read_persistent_clock64(wall_time); + *boot_offset = timespec64_sub(*wall_time, boot_time); +} + void read_boot_clock64(struct timespec64 *ts) { unsigned char clk[STORE_CLOCK_EXT_SIZE]; -- cgit v1.2.3 From 3eca993740b8eb40f514b90b1877a4dbcf0a6710 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:34 -0400 Subject: timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset() If architecture does not support exact boot time, it is challenging to estimate boot time without having a reference to the current persistent clock value. Yet, it cannot read the persistent clock time again, because this may lead to math discrepancies with the caller of read_boot_clock64() who have read the persistent clock at a different time. This is why it is better to provide two values simultaneously: the persistent clock value, and the boot time. Replace read_boot_clock64() with: read_persistent_wall_and_boot_offset(wall_time, boot_offset) Where wall_time is returned by read_persistent_clock() And boot_offset is wall_time - boot time, which defaults to 0. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-16-pasha.tatashin@oracle.com --- include/linux/timekeeping.h | 3 ++- kernel/time/timekeeping.c | 59 +++++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 86bc2026efce..686bc27acef0 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); extern int persistent_clock_is_local; extern void read_persistent_clock64(struct timespec64 *ts); -extern void read_boot_clock64(struct timespec64 *ts); +void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock, + struct timespec64 *boot_offset); extern int update_persistent_clock64(struct timespec64 now); /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4786df904c22..cb738f825c12 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1496,18 +1497,20 @@ void __weak read_persistent_clock64(struct timespec64 *ts64) } /** - * read_boot_clock64 - Return time of the system start. + * read_persistent_wall_and_boot_offset - Read persistent clock, and also offset + * from the boot. * * Weak dummy function for arches that do not yet support it. - * Function to read the exact time the system has been started. - * Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported. - * - * XXX - Do be sure to remove it once all arches implement it. + * wall_time - current time as returned by persistent clock + * boot_offset - offset that is defined as wall_time - boot_time + * default to 0. */ -void __weak read_boot_clock64(struct timespec64 *ts) +void __weak __init +read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, + struct timespec64 *boot_offset) { - ts->tv_sec = 0; - ts->tv_nsec = 0; + read_persistent_clock64(wall_time); + *boot_offset = (struct timespec64){0}; } /* Flag for if timekeeping_resume() has injected sleeptime */ @@ -1521,28 +1524,29 @@ static bool persistent_clock_exists; */ void __init timekeeping_init(void) { + struct timespec64 wall_time, boot_offset, wall_to_mono; struct timekeeper *tk = &tk_core.timekeeper; struct clocksource *clock; unsigned long flags; - struct timespec64 now, boot, tmp; - - read_persistent_clock64(&now); - if (!timespec64_valid_strict(&now)) { - pr_warn("WARNING: Persistent clock returned invalid value!\n" - " Check your CMOS/BIOS settings.\n"); - now.tv_sec = 0; - now.tv_nsec = 0; - } else if (now.tv_sec || now.tv_nsec) - persistent_clock_exists = true; - read_boot_clock64(&boot); - if (!timespec64_valid_strict(&boot)) { - pr_warn("WARNING: Boot clock returned invalid value!\n" - " Check your CMOS/BIOS settings.\n"); - boot.tv_sec = 0; - boot.tv_nsec = 0; + read_persistent_wall_and_boot_offset(&wall_time, &boot_offset); + if (timespec64_valid_strict(&wall_time) && + timespec64_to_ns(&wall_time) > 0) { + persistent_clock_exists = true; + } else { + pr_warn("Persistent clock returned invalid value"); + wall_time = (struct timespec64){0}; } + if (timespec64_compare(&wall_time, &boot_offset) < 0) + boot_offset = (struct timespec64){0}; + + /* + * We want set wall_to_mono, so the following is true: + * wall time + wall_to_mono = boot time + */ + wall_to_mono = timespec64_sub(boot_offset, wall_time); + raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); ntp_init(); @@ -1552,13 +1556,10 @@ void __init timekeeping_init(void) clock->enable(clock); tk_setup_internals(tk, clock); - tk_set_xtime(tk, &now); + tk_set_xtime(tk, &wall_time); tk->raw_sec = 0; - if (boot.tv_sec == 0 && boot.tv_nsec == 0) - boot = tk_xtime(tk); - set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); - tk_set_wall_to_mono(tk, tmp); + tk_set_wall_to_mono(tk, wall_to_mono); timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); -- cgit v1.2.3 From 4b1b7f8054896cee25669f6cea7cb6dd17f508f7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:35 -0400 Subject: timekeeping: Default boot time offset to local_clock() read_persistent_wall_and_boot_offset() is called during boot to read both the persistent clock and also return the offset between the boot time and the value of persistent clock. Change the default boot_offset from zero to local_clock() so architectures, that do not have a dedicated boot_clock but have early sched_clock(), such as SPARCv9, x86, and possibly more will benefit from this change by getting a better and more consistent estimate of the boot time without need for an arch specific implementation. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-17-pasha.tatashin@oracle.com --- kernel/time/timekeeping.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cb738f825c12..30d7f64ffc87 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1503,14 +1503,17 @@ void __weak read_persistent_clock64(struct timespec64 *ts64) * Weak dummy function for arches that do not yet support it. * wall_time - current time as returned by persistent clock * boot_offset - offset that is defined as wall_time - boot_time - * default to 0. + * The default function calculates offset based on the current value of + * local_clock(). This way architectures that support sched_clock() but don't + * support dedicated boot time clock will provide the best estimate of the + * boot time. */ void __weak __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, struct timespec64 *boot_offset) { read_persistent_clock64(wall_time); - *boot_offset = (struct timespec64){0}; + *boot_offset = ns_to_timespec64(local_clock()); } /* Flag for if timekeeping_resume() has injected sleeptime */ -- cgit v1.2.3 From 00067a6db2e95f3b9d9a017b3be3c715d54cc0de Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:36 -0400 Subject: s390/time: Remove read_boot_clock64() read_boot_clock64() was replaced by read_persistent_wall_and_boot_offset() so remove it. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-18-pasha.tatashin@oracle.com --- arch/s390/kernel/time.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d1f5447d5687..e8766beee5ad 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -239,19 +239,6 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time, *boot_offset = timespec64_sub(*wall_time, boot_time); } -void read_boot_clock64(struct timespec64 *ts) -{ - unsigned char clk[STORE_CLOCK_EXT_SIZE]; - __u64 delta; - - delta = initial_leap_seconds + TOD_UNIX_EPOCH; - memcpy(clk, tod_clock_base, 16); - *(__u64 *) &clk[1] -= delta; - if (*(__u64 *) &clk[1] > delta) - clk[0]--; - ext_to_timespec64(clk, ts); -} - static u64 read_tod_clock(struct clocksource *cs) { unsigned long long now, adj; -- cgit v1.2.3 From 227e3958a780499b3ec41c36d4752ac4f4962874 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:37 -0400 Subject: ARM/time: Remove read_boot_clock64() read_boot_clock64() is deleted, and replaced with read_persistent_wall_and_boot_offset(). The default implementation of read_persistent_wall_and_boot_offset() provides a better fallback than the current stubs for read_boot_clock64() that arm has with no users, so remove the old code. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-19-pasha.tatashin@oracle.com --- arch/arm/include/asm/mach/time.h | 3 +-- arch/arm/kernel/time.c | 15 ++------------- arch/arm/plat-omap/counter_32k.c | 2 +- drivers/clocksource/tegra20_timer.c | 2 +- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 0f79e4dec7f9..4ac3a019a46f 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -13,7 +13,6 @@ extern void timer_tick(void); typedef void (*clock_access_fn)(struct timespec64 *); -extern int register_persistent_clock(clock_access_fn read_boot, - clock_access_fn read_persistent); +extern int register_persistent_clock(clock_access_fn read_persistent); #endif diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index cf2701cb0de8..078b259ead4e 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts) } static clock_access_fn __read_persistent_clock = dummy_clock_access; -static clock_access_fn __read_boot_clock = dummy_clock_access; void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock64(struct timespec64 *ts) -{ - __read_boot_clock(ts); -} - -int __init register_persistent_clock(clock_access_fn read_boot, - clock_access_fn read_persistent) +int __init register_persistent_clock(clock_access_fn read_persistent) { /* Only allow the clockaccess functions to be registered once */ - if (__read_persistent_clock == dummy_clock_access && - __read_boot_clock == dummy_clock_access) { - if (read_boot) - __read_boot_clock = read_boot; + if (__read_persistent_clock == dummy_clock_access) { if (read_persistent) __read_persistent_clock = read_persistent; - return 0; } diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 2438b96004c1..fcc5bfec8bd1 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) } sched_clock_register(omap_32k_read_sched_clock, 32, 32768); - register_persistent_clock(NULL, omap_read_persistent_clock64); + register_persistent_clock(omap_read_persistent_clock64); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); return 0; diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index c337a8100a7b..2242a36fc5b0 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -259,6 +259,6 @@ static int __init tegra20_init_rtc(struct device_node *np) else clk_prepare_enable(clk); - return register_persistent_clock(NULL, tegra_read_persistent_clock64); + return register_persistent_clock(tegra_read_persistent_clock64); } TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); -- cgit v1.2.3 From cf7a63ef4e0203f6f33284c69e8188d91422de83 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:38 -0400 Subject: x86/tsc: Calibrate tsc only once During boot tsc is calibrated twice: once in tsc_early_delay_calibrate(), and the second time in tsc_init(). Rename tsc_early_delay_calibrate() to tsc_early_init(), and rework it so the calibration is done only early, and make tsc_init() to use the values already determined in tsc_early_init(). Sometimes it is not possible to determine tsc early, as the subsystem that is required is not yet initialized, in such case try again later in tsc_init(). Suggested-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-20-pasha.tatashin@oracle.com --- arch/x86/include/asm/tsc.h | 2 +- arch/x86/kernel/setup.c | 2 +- arch/x86/kernel/tsc.c | 87 +++++++++++++++++++++++++--------------------- 3 files changed, 49 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 2701d221583a..c4368ff73652 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -33,7 +33,7 @@ static inline cycles_t get_cycles(void) extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); -extern void tsc_early_delay_calibrate(void); +extern void tsc_early_init(void); extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 7490de925a81..5d32c55aeb8b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1014,6 +1014,7 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + tsc_early_init(); x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */ @@ -1199,7 +1200,6 @@ void __init setup_arch(char **cmdline_p) memblock_find_dma_reserve(); - tsc_early_delay_calibrate(); if (!early_xdbc_setup_hardware()) early_xdbc_register_console(); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 186395041725..4cab2236169e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -33,6 +33,8 @@ EXPORT_SYMBOL(cpu_khz); unsigned int __read_mostly tsc_khz; EXPORT_SYMBOL(tsc_khz); +#define KHZ 1000 + /* * TSC can be unstable due to cpufreq or due to unsynced TSCs */ @@ -1335,34 +1337,10 @@ unreg: */ device_initcall(init_tsc_clocksource); -void __init tsc_early_delay_calibrate(void) -{ - unsigned long lpj; - - if (!boot_cpu_has(X86_FEATURE_TSC)) - return; - - cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); - - tsc_khz = tsc_khz ? : cpu_khz; - if (!tsc_khz) - return; - - lpj = tsc_khz * 1000; - do_div(lpj, HZ); - loops_per_jiffy = lpj; -} - -void __init tsc_init(void) +static bool __init determine_cpu_tsc_frequencies(void) { - u64 lpj, cyc; - int cpu; - - if (!boot_cpu_has(X86_FEATURE_TSC)) { - setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); - return; - } + /* Make sure that cpu and tsc are not already calibrated */ + WARN_ON(cpu_khz || tsc_khz); cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); @@ -1377,20 +1355,52 @@ void __init tsc_init(void) else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) cpu_khz = tsc_khz; - if (!tsc_khz) { - mark_tsc_unstable("could not calculate TSC khz"); - setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); - return; - } + if (tsc_khz == 0) + return false; pr_info("Detected %lu.%03lu MHz processor\n", - (unsigned long)cpu_khz / 1000, - (unsigned long)cpu_khz % 1000); + (unsigned long)cpu_khz / KHZ, + (unsigned long)cpu_khz % KHZ); if (cpu_khz != tsc_khz) { pr_info("Detected %lu.%03lu MHz TSC", - (unsigned long)tsc_khz / 1000, - (unsigned long)tsc_khz % 1000); + (unsigned long)tsc_khz / KHZ, + (unsigned long)tsc_khz % KHZ); + } + return true; +} + +static unsigned long __init get_loops_per_jiffy(void) +{ + unsigned long lpj = tsc_khz * KHZ; + + do_div(lpj, HZ); + return lpj; +} + +void __init tsc_early_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_TSC)) + return; + if (!determine_cpu_tsc_frequencies()) + return; + loops_per_jiffy = get_loops_per_jiffy(); +} + +void __init tsc_init(void) +{ + if (!boot_cpu_has(X86_FEATURE_TSC)) { + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + return; + } + + if (!tsc_khz) { + /* We failed to determine frequencies earlier, try again */ + if (!determine_cpu_tsc_frequencies()) { + mark_tsc_unstable("could not calculate TSC khz"); + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + return; + } } /* Sanitize TSC ADJUST before cyc2ns gets initialized */ @@ -1413,10 +1423,7 @@ void __init tsc_init(void) if (!no_sched_irq_time) enable_sched_clock_irqtime(); - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - + lpj_fine = get_loops_per_jiffy(); use_tsc_delay(); check_system_tsc_reliable(); -- cgit v1.2.3 From e2a9ca29b5edc89da2fddeae30e1070b272395c5 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:39 -0400 Subject: x86/tsc: Initialize cyc2ns when tsc frequency is determined cyc2ns converts tsc to nanoseconds, and it is handled in a per-cpu data structure. Currently, the setup code for c2ns data for every possible CPU goes through the same sequence of calculations as for the boot CPU, but is based on the same tsc frequency as the boot CPU, and thus this is not necessary. Initialize the boot cpu when tsc frequency is determined. Copy the calculated data from the boot CPU to the other CPUs in tsc_init(). In addition do the following: - Remove unnecessary zeroing of c2ns data by removing cyc2ns_data_init() - Split set_cyc2ns_scale() into two functions, so set_cyc2ns_scale() can be called when system is up, and wraps around __set_cyc2ns_scale() that can be called directly when system is booting but avoids saving restoring IRQs and going and waking up from idle. Suggested-by: Thomas Gleixner Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-21-pasha.tatashin@oracle.com --- arch/x86/kernel/tsc.c | 94 +++++++++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4cab2236169e..7ea0718a4c75 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -103,23 +103,6 @@ void cyc2ns_read_end(void) * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static void cyc2ns_data_init(struct cyc2ns_data *data) -{ - data->cyc2ns_mul = 0; - data->cyc2ns_shift = 0; - data->cyc2ns_offset = 0; -} - -static void __init cyc2ns_init(int cpu) -{ - struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); - - cyc2ns_data_init(&c2n->data[0]); - cyc2ns_data_init(&c2n->data[1]); - - seqcount_init(&c2n->seq); -} - static inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; @@ -135,18 +118,11 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) +static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) { unsigned long long ns_now; struct cyc2ns_data data; struct cyc2ns *c2n; - unsigned long flags; - - local_irq_save(flags); - sched_clock_idle_sleep_event(); - - if (!khz) - goto done; ns_now = cycles_2_ns(tsc_now); @@ -178,12 +154,55 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_ c2n->data[0] = data; raw_write_seqcount_latch(&c2n->seq); c2n->data[1] = data; +} + +static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) +{ + unsigned long flags; + + local_irq_save(flags); + sched_clock_idle_sleep_event(); + + if (khz) + __set_cyc2ns_scale(khz, cpu, tsc_now); -done: sched_clock_idle_wakeup_event(); local_irq_restore(flags); } +/* + * Initialize cyc2ns for boot cpu + */ +static void __init cyc2ns_init_boot_cpu(void) +{ + struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns); + + seqcount_init(&c2n->seq); + __set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc()); +} + +/* + * Secondary CPUs do not run through cyc2ns_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ +static void __init cyc2ns_init_secondary_cpus(void) +{ + unsigned int cpu, this_cpu = smp_processor_id(); + struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns); + struct cyc2ns_data *data = c2n->data; + + for_each_possible_cpu(cpu) { + if (cpu != this_cpu) { + seqcount_init(&c2n->seq); + c2n = per_cpu_ptr(&cyc2ns, cpu); + c2n->data[0] = data[0]; + c2n->data[1] = data[1]; + } + } +} + /* * Scheduler clock - returns current time in nanosec units. */ @@ -1385,6 +1404,10 @@ void __init tsc_early_init(void) if (!determine_cpu_tsc_frequencies()) return; loops_per_jiffy = get_loops_per_jiffy(); + + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + cyc2ns_init_boot_cpu(); } void __init tsc_init(void) @@ -1401,23 +1424,12 @@ void __init tsc_init(void) setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + cyc2ns_init_boot_cpu(); } - /* Sanitize TSC ADJUST before cyc2ns gets initialized */ - tsc_store_and_check_tsc_adjust(true); - - /* - * Secondary CPUs do not run through tsc_init(), so set up - * all the scale factors for all CPUs, assuming the same - * speed as the bootup CPU. (cpufreq notifiers will fix this - * up if their speed diverges) - */ - cyc = rdtsc(); - for_each_possible_cpu(cpu) { - cyc2ns_init(cpu); - set_cyc2ns_scale(tsc_khz, cpu, cyc); - } - + cyc2ns_init_secondary_cpus(); static_branch_enable(&__use_tsc); if (!no_sched_irq_time) -- cgit v1.2.3 From 4763f03d3d186ce8a1125844790152d76804ad60 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:40 -0400 Subject: x86/tsc: Use TSC as sched clock early All prerequesites for enabling TSC as sched clock early in the boot process are available now: - Early attempt of TSC calibration - Early availablity of static branch patching If TSC frequency can be established in the early calibration, enable the static key which switches sched clock to use TSC. [ tglx: Massaged changelog ] Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-22-pasha.tatashin@oracle.com --- arch/x86/kernel/tsc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7ea0718a4c75..9277ae9b68b3 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1408,6 +1408,7 @@ void __init tsc_early_init(void) /* Sanitize TSC ADJUST before cyc2ns gets initialized */ tsc_store_and_check_tsc_adjust(true); cyc2ns_init_boot_cpu(); + static_branch_enable(&__use_tsc); } void __init tsc_init(void) -- cgit v1.2.3 From 5d2a4e91a541cb04d20d11602f0f9340291322ac Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:41 -0400 Subject: sched/clock: Move sched clock initialization and merge with generic clock sched_clock_postinit() initializes a generic clock on systems where no other clock is provided. This function may be called only after timekeeping_init(). Rename sched_clock_postinit to generic_clock_inti() and call it from sched_clock_init(). Move the call for sched_clock_init() until after time_init(). Suggested-by: Peter Zijlstra Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-23-pasha.tatashin@oracle.com --- include/linux/sched_clock.h | 5 ++--- init/main.c | 4 ++-- kernel/sched/clock.c | 27 +++++++++++++++++---------- kernel/sched/core.c | 1 - kernel/time/sched_clock.c | 2 +- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 411b52e424e1..abe28d5cb3f4 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -9,17 +9,16 @@ #define LINUX_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK -extern void sched_clock_postinit(void); +extern void generic_sched_clock_init(void); extern void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate); #else -static inline void sched_clock_postinit(void) { } +static inline void generic_sched_clock_init(void) { } static inline void sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { - ; } #endif diff --git a/init/main.c b/init/main.c index 3b4ada11ed52..162d931c9511 100644 --- a/init/main.c +++ b/init/main.c @@ -79,7 +79,7 @@ #include #include #include -#include +#include #include #include #include @@ -642,7 +642,7 @@ asmlinkage __visible void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); - sched_clock_postinit(); + sched_clock_init(); printk_safe_init(); perf_event_init(); profile_init(); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 10c83e73837a..0e9dbb2d9aea 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -53,6 +53,7 @@ * */ #include "sched.h" +#include /* * Scheduler clock - returns current time in nanosec units. @@ -68,11 +69,6 @@ EXPORT_SYMBOL_GPL(sched_clock); __read_mostly int sched_clock_running; -void sched_clock_init(void) -{ - sched_clock_running = 1; -} - #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* * We must start with !__sched_clock_stable because the unstable -> stable @@ -199,6 +195,15 @@ void clear_sched_clock_stable(void) __clear_sched_clock_stable(); } +static void __sched_clock_gtod_offset(void) +{ + __gtod_offset = (sched_clock() + __sched_clock_offset) - ktime_get_ns(); +} + +void __init sched_clock_init(void) +{ + sched_clock_running = 1; +} /* * We run this as late_initcall() such that it runs after all built-in drivers, * notably: acpi_processor and intel_idle, which can mark the TSC as unstable. @@ -385,8 +390,6 @@ void sched_clock_tick(void) void sched_clock_tick_stable(void) { - u64 gtod, clock; - if (!sched_clock_stable()) return; @@ -398,9 +401,7 @@ void sched_clock_tick_stable(void) * TSC to be unstable, any computation will be computing crap. */ local_irq_disable(); - gtod = ktime_get_ns(); - clock = sched_clock(); - __gtod_offset = (clock + __sched_clock_offset) - gtod; + __sched_clock_gtod_offset(); local_irq_enable(); } @@ -434,6 +435,12 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ +void __init sched_clock_init(void) +{ + sched_clock_running = 1; + generic_sched_clock_init(); +} + u64 sched_clock_cpu(int cpu) { if (unlikely(!sched_clock_running)) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fe365c9a08e9..552406e9713b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5954,7 +5954,6 @@ void __init sched_init(void) int i, j; unsigned long alloc_size = 0, ptr; - sched_clock_init(); wait_bit_init(); #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 2d8f05aad442..cbc72c2c1fca 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } -void __init sched_clock_postinit(void) +void __init generic_sched_clock_init(void) { /* * If no sched_clock() function has been provided at that point, -- cgit v1.2.3 From 857baa87b6422bcfb84ed3631d6839920cb5b09d Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:42 -0400 Subject: sched/clock: Enable sched clock early Allow sched_clock() to be used before schec_clock_init() is called. This provides a way to get early boot timestamps on machines with unstable clocks. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-24-pasha.tatashin@oracle.com --- init/main.c | 2 +- kernel/sched/clock.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index 162d931c9511..ff0a24170b95 100644 --- a/init/main.c +++ b/init/main.c @@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); - sched_clock_init(); printk_safe_init(); perf_event_init(); profile_init(); @@ -697,6 +696,7 @@ asmlinkage __visible void __init start_kernel(void) acpi_early_init(); if (late_time_init) late_time_init(); + sched_clock_init(); calibrate_delay(); pid_idr_init(); anon_vma_init(); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 0e9dbb2d9aea..422cd63f8f17 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -202,7 +202,25 @@ static void __sched_clock_gtod_offset(void) void __init sched_clock_init(void) { + unsigned long flags; + + /* + * Set __gtod_offset such that once we mark sched_clock_running, + * sched_clock_tick() continues where sched_clock() left off. + * + * Even if TSC is buggered, we're still UP at this point so it + * can't really be out of sync. + */ + local_irq_save(flags); + __sched_clock_gtod_offset(); + local_irq_restore(flags); + sched_clock_running = 1; + + /* Now that sched_clock_running is set adjust scd */ + local_irq_save(flags); + sched_clock_tick(); + local_irq_restore(flags); } /* * We run this as late_initcall() such that it runs after all built-in drivers, @@ -356,7 +374,7 @@ u64 sched_clock_cpu(int cpu) return sched_clock() + __sched_clock_offset; if (unlikely(!sched_clock_running)) - return 0ull; + return sched_clock(); preempt_disable_notrace(); scd = cpu_sdc(cpu); -- cgit v1.2.3 From 46457ea464f5341d1f9dad8dd213805d45f7f117 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:43 -0400 Subject: sched/clock: Use static key for sched_clock_running sched_clock_running may be read every time sched_clock_cpu() is called. Yet, this variable is updated only twice during boot, and never changes again, therefore it is better to make it a static key. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-25-pasha.tatashin@oracle.com --- kernel/sched/clock.c | 16 ++++++++-------- kernel/sched/debug.c | 2 -- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 422cd63f8f17..c5c47ad3f386 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -67,7 +67,7 @@ unsigned long long __weak sched_clock(void) } EXPORT_SYMBOL_GPL(sched_clock); -__read_mostly int sched_clock_running; +static DEFINE_STATIC_KEY_FALSE(sched_clock_running); #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK /* @@ -191,7 +191,7 @@ void clear_sched_clock_stable(void) smp_mb(); /* matches sched_clock_init_late() */ - if (sched_clock_running == 2) + if (static_key_count(&sched_clock_running.key) == 2) __clear_sched_clock_stable(); } @@ -215,7 +215,7 @@ void __init sched_clock_init(void) __sched_clock_gtod_offset(); local_irq_restore(flags); - sched_clock_running = 1; + static_branch_inc(&sched_clock_running); /* Now that sched_clock_running is set adjust scd */ local_irq_save(flags); @@ -228,7 +228,7 @@ void __init sched_clock_init(void) */ static int __init sched_clock_init_late(void) { - sched_clock_running = 2; + static_branch_inc(&sched_clock_running); /* * Ensure that it is impossible to not do a static_key update. * @@ -373,7 +373,7 @@ u64 sched_clock_cpu(int cpu) if (sched_clock_stable()) return sched_clock() + __sched_clock_offset; - if (unlikely(!sched_clock_running)) + if (!static_branch_unlikely(&sched_clock_running)) return sched_clock(); preempt_disable_notrace(); @@ -396,7 +396,7 @@ void sched_clock_tick(void) if (sched_clock_stable()) return; - if (unlikely(!sched_clock_running)) + if (!static_branch_unlikely(&sched_clock_running)) return; lockdep_assert_irqs_disabled(); @@ -455,13 +455,13 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); void __init sched_clock_init(void) { - sched_clock_running = 1; + static_branch_inc(&sched_clock_running); generic_sched_clock_init(); } u64 sched_clock_cpu(int cpu) { - if (unlikely(!sched_clock_running)) + if (!static_branch_unlikely(&sched_clock_running)) return 0; return sched_clock(); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index e593b4118578..b0212f489a33 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -623,8 +623,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) #undef PU } -extern __read_mostly int sched_clock_running; - static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); -- cgit v1.2.3 From 03821f451d2d2d7599061244734245be139014ea Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:44 -0400 Subject: x86/tsc: Split native_calibrate_cpu() into early and late parts During early boot TSC and CPU frequency can be calibrated using MSR, CPUID, and quick PIT calibration methods. The other methods PIT/HPET/PMTIMER are available only after ACPI is initialized. Split native_calibrate_cpu() into early and late parts so they can be called separately during early and late tsc calibration. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-26-pasha.tatashin@oracle.com --- arch/x86/include/asm/tsc.h | 1 + arch/x86/kernel/tsc.c | 54 ++++++++++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index c4368ff73652..88140e4f2292 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -40,6 +40,7 @@ extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern void mark_tsc_async_resets(char *reason); extern unsigned long native_calibrate_cpu(void); +extern unsigned long native_calibrate_cpu_early(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 9277ae9b68b3..60586779b02c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -680,30 +680,17 @@ static unsigned long cpu_khz_from_cpuid(void) return eax_base_mhz * 1000; } -/** - * native_calibrate_cpu - calibrate the cpu on boot +/* + * calibrate cpu using pit, hpet, and ptimer methods. They are available + * later in boot after acpi is initialized. */ -unsigned long native_calibrate_cpu(void) +static unsigned long pit_hpet_ptimer_calibrate_cpu(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate; + unsigned long flags, latch, ms; int hpet = is_hpet_enabled(), i, loopmin; - fast_calibrate = cpu_khz_from_cpuid(); - if (fast_calibrate) - return fast_calibrate; - - fast_calibrate = cpu_khz_from_msr(); - if (fast_calibrate) - return fast_calibrate; - - local_irq_save(flags); - fast_calibrate = quick_pit_calibrate(); - local_irq_restore(flags); - if (fast_calibrate) - return fast_calibrate; - /* * Run 5 calibration loops to get the lowest frequency value * (the best estimate). We use two different calibration modes @@ -846,6 +833,37 @@ unsigned long native_calibrate_cpu(void) return tsc_pit_min; } +/** + * native_calibrate_cpu_early - can calibrate the cpu early in boot + */ +unsigned long native_calibrate_cpu_early(void) +{ + unsigned long flags, fast_calibrate = cpu_khz_from_cpuid(); + + if (!fast_calibrate) + fast_calibrate = cpu_khz_from_msr(); + if (!fast_calibrate) { + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + } + return fast_calibrate; +} + + +/** + * native_calibrate_cpu - calibrate the cpu + */ +unsigned long native_calibrate_cpu(void) +{ + unsigned long tsc_freq = native_calibrate_cpu_early(); + + if (!tsc_freq) + tsc_freq = pit_hpet_ptimer_calibrate_cpu(); + + return tsc_freq; +} + void recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP -- cgit v1.2.3 From 8dbe438589f373544a1af8b4a859e4da853c0f90 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Thu, 19 Jul 2018 16:55:45 -0400 Subject: x86/tsc: Make use of tsc_calibrate_cpu_early() During early boot enable tsc_calibrate_cpu_early() and switch to tsc_calibrate_cpu() only later. Do this unconditionally, because it is unknown what methods other cpus will use to calibrate once they are onlined. If by the time tsc_init() is called tsc frequency is still unknown do only pit_hpet_ptimer_calibrate_cpu() to calibrate, as this function contains the only methods wich have not been called and tried earlier. Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: peterz@infradead.org Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180719205545.16512-27-pasha.tatashin@oracle.com --- arch/x86/include/asm/tsc.h | 1 - arch/x86/kernel/tsc.c | 25 +++++++++++++++++++------ arch/x86/kernel/x86_init.c | 2 +- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 88140e4f2292..eb5bbfeccb66 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -39,7 +39,6 @@ extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); extern int check_tsc_unstable(void); extern void mark_tsc_async_resets(char *reason); -extern unsigned long native_calibrate_cpu(void); extern unsigned long native_calibrate_cpu_early(void); extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 60586779b02c..02e416b87ac1 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -854,7 +854,7 @@ unsigned long native_calibrate_cpu_early(void) /** * native_calibrate_cpu - calibrate the cpu */ -unsigned long native_calibrate_cpu(void) +static unsigned long native_calibrate_cpu(void) { unsigned long tsc_freq = native_calibrate_cpu_early(); @@ -1374,13 +1374,19 @@ unreg: */ device_initcall(init_tsc_clocksource); -static bool __init determine_cpu_tsc_frequencies(void) +static bool __init determine_cpu_tsc_frequencies(bool early) { /* Make sure that cpu and tsc are not already calibrated */ WARN_ON(cpu_khz || tsc_khz); - cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); + if (early) { + cpu_khz = x86_platform.calibrate_cpu(); + tsc_khz = x86_platform.calibrate_tsc(); + } else { + /* We should not be here with non-native cpu calibration */ + WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); + cpu_khz = pit_hpet_ptimer_calibrate_cpu(); + } /* * Trust non-zero tsc_khz as authorative, @@ -1419,7 +1425,7 @@ void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) return; - if (!determine_cpu_tsc_frequencies()) + if (!determine_cpu_tsc_frequencies(true)) return; loops_per_jiffy = get_loops_per_jiffy(); @@ -1431,6 +1437,13 @@ void __init tsc_early_init(void) void __init tsc_init(void) { + /* + * native_calibrate_cpu_early can only calibrate using methods that are + * available early in boot. + */ + if (x86_platform.calibrate_cpu == native_calibrate_cpu_early) + x86_platform.calibrate_cpu = native_calibrate_cpu; + if (!boot_cpu_has(X86_FEATURE_TSC)) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; @@ -1438,7 +1451,7 @@ void __init tsc_init(void) if (!tsc_khz) { /* We failed to determine frequencies earlier, try again */ - if (!determine_cpu_tsc_frequencies()) { + if (!determine_cpu_tsc_frequencies(false)) { mark_tsc_unstable("could not calculate TSC khz"); setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3ab867603e81..2792b5573818 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -109,7 +109,7 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; struct x86_platform_ops x86_platform __ro_after_init = { - .calibrate_cpu = native_calibrate_cpu, + .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, -- cgit v1.2.3 From 9407f5a7ee77c631d1e100436132437cf6237e45 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Jul 2018 10:09:11 +0200 Subject: sched/clock: Close a hole in sched_clock_init() All data required for the 'unstable' sched_clock must be set-up _before_ enabling it -- setting sched_clock_running. This includes the __gtod_offset but also a recent scd stamp. Make the gtod-offset update also set the csd stamp -- it requires the same two clock reads _anyway_. This doesn't hurt in the sched_clock_tick_stable() case and ensures sched_clock_init() gets everything set-up before use. Also switch to unconditional IRQ-disable/enable because the static key stuff already requires this is not ran with IRQs disabled. Fixes: 857baa87b642 ("sched/clock: Enable sched clock early") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: Pavel Tatashin Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: linux@armlinux.org.uk Cc: schwidefsky@de.ibm.com Cc: heiko.carstens@de.ibm.com Cc: john.stultz@linaro.org Cc: sboyd@codeaurora.org Cc: hpa@zytor.com Cc: douly.fnst@cn.fujitsu.com Cc: prarit@redhat.com Cc: feng.tang@intel.com Cc: pmladek@suse.com Cc: gnomes@lxorguk.ukuu.org.uk Cc: linux-s390@vger.kernel.org Cc: boris.ostrovsky@oracle.com Cc: jgross@suse.com Cc: pbonzini@redhat.com Link: https://lkml.kernel.org/r/20180720080911.GM2494@hirez.programming.kicks-ass.net --- kernel/sched/clock.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c5c47ad3f386..811a39aca1ce 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -197,13 +197,14 @@ void clear_sched_clock_stable(void) static void __sched_clock_gtod_offset(void) { - __gtod_offset = (sched_clock() + __sched_clock_offset) - ktime_get_ns(); + struct sched_clock_data *scd = this_scd(); + + __scd_stamp(scd); + __gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod; } void __init sched_clock_init(void) { - unsigned long flags; - /* * Set __gtod_offset such that once we mark sched_clock_running, * sched_clock_tick() continues where sched_clock() left off. @@ -211,16 +212,11 @@ void __init sched_clock_init(void) * Even if TSC is buggered, we're still UP at this point so it * can't really be out of sync. */ - local_irq_save(flags); + local_irq_disable(); __sched_clock_gtod_offset(); - local_irq_restore(flags); + local_irq_enable(); static_branch_inc(&sched_clock_running); - - /* Now that sched_clock_running is set adjust scd */ - local_irq_save(flags); - sched_clock_tick(); - local_irq_restore(flags); } /* * We run this as late_initcall() such that it runs after all built-in drivers, -- cgit v1.2.3 From 684ad537abff987886d63fb3c573eeca40d7f2db Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 25 Jul 2018 16:00:18 -0400 Subject: timekeeping: Prevent false warning when persistent clock is not available On arches with no persistent clock a message like this is printed during boot: [ 0.000000] Persistent clock returned invalid value The value is not invalid: Zero means that no persistent clock is available and the absence of persistent clock should be quietly accepted. Fixes: 3eca993740b8 ("timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()") Signed-off-by: Pavel Tatashin Signed-off-by: Thomas Gleixner Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Cc: sboyd@kernel.org Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/20180725200018.23722-1-pasha.tatashin@oracle.com --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 30d7f64ffc87..6183e7460138 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1536,7 +1536,7 @@ void __init timekeeping_init(void) if (timespec64_valid_strict(&wall_time) && timespec64_to_ns(&wall_time) > 0) { persistent_clock_exists = true; - } else { + } else if (timespec64_to_ns(&wall_time) != 0) { pr_warn("Persistent clock returned invalid value"); wall_time = (struct timespec64){0}; } -- cgit v1.2.3 From bd9f943e5d2a42d864f9692477a25034c9d47dcc Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 30 Jul 2018 09:52:52 -0400 Subject: sched/clock: Disable interrupts when calling generic_sched_clock_init() sched_clock_init() used be called early during boot when interrupts were still disabled. After the recent changes to utilize sched clock early the sched_clock_init() call happens when interrupts are already enabled, which triggers the following warning: WARNING: CPU: 0 PID: 0 at kernel/time/sched_clock.c:180 sched_clock_register+0x44/0x278 [] (warn_slowpath_null) from [] (sched_clock_register+0x44/0x278) [] (sched_clock_register) from [] (generic_sched_clock_init+0x28/0x88) [] (generic_sched_clock_init) from [] (sched_clock_init+0x54/0x74) [] (sched_clock_init) from [] (start_kernel+0x310/0x3e4) [] (start_kernel) from [<00000000>] ( (null)) Disable IRQs for the duration of generic_sched_clock_init(). Fixes: 857baa87b642 ("sched/clock: Enable sched clock early") Signed-off-by: Pavel Tatashin Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: steven.sistare@oracle.com Cc: daniel.m.jordan@oracle.com Link: https://lkml.kernel.org/r/20180730135252.24599-1-pasha.tatashin@oracle.com --- kernel/sched/clock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 811a39aca1ce..e3e3b979f9bd 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -452,7 +452,9 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); void __init sched_clock_init(void) { static_branch_inc(&sched_clock_running); + local_irq_disable(); generic_sched_clock_init(); + local_irq_enable(); } u64 sched_clock_cpu(int cpu) -- cgit v1.2.3 From 608008a45798fe9e2aee04f99b5270ea57c1376f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 30 Jul 2018 15:54:20 +0800 Subject: x86/tsc: Consolidate init code Split out suplicated code from tsc_early_init() and tsc_init() into a common helper and fixup some comment typos. [ tglx: Massaged changelog and renamed function ] Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Reviewed-by: Pavel Tatashin Cc: Cc: Peter Zijlstra Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180730075421.22830-2-douly.fnst@cn.fujitsu.com --- arch/x86/kernel/tsc.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 02e416b87ac1..1463468ba9a0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -182,7 +182,7 @@ static void __init cyc2ns_init_boot_cpu(void) } /* - * Secondary CPUs do not run through cyc2ns_init(), so set up + * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same * speed as the bootup CPU. (cpufreq notifiers will fix this * up if their speed diverges) @@ -1389,7 +1389,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early) } /* - * Trust non-zero tsc_khz as authorative, + * Trust non-zero tsc_khz as authoritative, * and use it to sanity check cpu_khz, * which will be off if system timer is off. */ @@ -1421,6 +1421,14 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } +static void __init tsc_enable_sched_clock(void) +{ + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + cyc2ns_init_boot_cpu(); + static_branch_enable(&__use_tsc); +} + void __init tsc_early_init(void) { if (!boot_cpu_has(X86_FEATURE_TSC)) @@ -1429,10 +1437,7 @@ void __init tsc_early_init(void) return; loops_per_jiffy = get_loops_per_jiffy(); - /* Sanitize TSC ADJUST before cyc2ns gets initialized */ - tsc_store_and_check_tsc_adjust(true); - cyc2ns_init_boot_cpu(); - static_branch_enable(&__use_tsc); + tsc_enable_sched_clock(); } void __init tsc_init(void) @@ -1456,13 +1461,10 @@ void __init tsc_init(void) setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } - /* Sanitize TSC ADJUST before cyc2ns gets initialized */ - tsc_store_and_check_tsc_adjust(true); - cyc2ns_init_boot_cpu(); + tsc_enable_sched_clock(); } cyc2ns_init_secondary_cpus(); - static_branch_enable(&__use_tsc); if (!no_sched_irq_time) enable_sched_clock_irqtime(); -- cgit v1.2.3 From 1088c6eef261939bda8346ec35b513790a2111d5 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 30 Jul 2018 15:54:21 +0800 Subject: x86/kvmclock: Mark kvm_get_preset_lpj() as __init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_get_preset_lpj() is only called from kvmclock_init(), so mark it __init as well. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Reviewed-by: Pavel Tatashin Cc: Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Cc: "H. Peter Anvin" Cc: "Radim Krčmář" Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20180730075421.22830-3-douly.fnst@cn.fujitsu.com --- arch/x86/kernel/kvmclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 91b94c0ae4e3..d2edd7e6c294 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -145,7 +145,7 @@ static unsigned long kvm_get_tsc_khz(void) return pvclock_tsc_khz(this_cpu_pvti()); } -static void kvm_get_preset_lpj(void) +static void __init kvm_get_preset_lpj(void) { unsigned long khz; u64 lpj; -- cgit v1.2.3