summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 09:43:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 09:43:34 -0800
commit64d6a12094f35d644540c15440874723b1887f9d (patch)
tree84a3517ad84f18504abf72f4280885d15ceb32c9
parentcd4771f7709211082cbc41dc1f5b2be774ef1604 (diff)
parent4df4cb9e99f83b70d54bc0e25081ac23cceafcbc (diff)
downloadlwn-64d6a12094f35d644540c15440874723b1887f9d.tar.gz
lwn-64d6a12094f35d644540c15440874723b1887f9d.zip
Merge branch 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 hyperv updates from Ingo Molnar: "Misc updates to the hyperv guest code: - Rework clockevents initialization to better support hibernation - Allow guests to enable InvariantTSC - Micro-optimize send_ipi_one" * 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/hyperv: Initialize clockevents earlier in CPU onlining x86/hyperv: Allow guests to enable InvariantTSC x86/hyperv: Micro-optimize send_ipi_one()
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_init.c6
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h5
-rw-r--r--arch/x86/include/asm/trace/hyperv.h15
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c7
-rw-r--r--drivers/clocksource/hyperv_timer.c154
-rw-r--r--drivers/hv/hv.c4
-rw-r--r--drivers/hv/vmbus_drv.c30
-rw-r--r--include/clocksource/hyperv_timer.h7
-rw-r--r--include/linux/cpuhotplug.h1
10 files changed, 190 insertions, 55 deletions
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index e01078e93dd3..40e0e322161d 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -194,10 +194,20 @@ do_ex_hypercall:
static bool __send_ipi_one(int cpu, int vector)
{
- struct cpumask mask = CPU_MASK_NONE;
+ int vp = hv_cpu_number_to_vp_number(cpu);
- cpumask_set_cpu(cpu, &mask);
- return __send_ipi_mask(&mask, vector);
+ trace_hyperv_send_ipi_one(cpu, vector);
+
+ if (!hv_hypercall_pg || (vp == VP_INVAL))
+ return false;
+
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return false;
+
+ if (vp >= 64)
+ return __send_ipi_mask_ex(cpumask_of(cpu), vector);
+
+ return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
}
static void hv_send_ipi(int cpu, int vector)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2db3972c0e0f..50ff030d9224 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -311,6 +311,12 @@ void __init hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ /*
+ * Ignore any errors in setting up stimer clockevents
+ * as we can run with the LAPIC timer as a fallback.
+ */
+ (void)hv_stimer_alloc();
+
hv_apic_init();
x86_init.pci.arch_init = hv_pci_init;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 7741e211f7f5..5f10f7f2098d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -86,6 +86,8 @@
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
/* AccessReenlightenmentControls privilege */
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+/* AccessTscInvariantControls privilege */
+#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
/*
* Feature identification: indicates which flags were specified at partition
@@ -278,6 +280,9 @@
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+/* TSC invariant control */
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..4d705cb4d63b 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector)
);
+TRACE_EVENT(hyperv_send_ipi_one,
+ TP_PROTO(int cpu,
+ int vector),
+ TP_ARGS(cpu, vector),
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->cpu = cpu;
+ __entry->vector = vector;
+ ),
+ TP_printk("cpu %d vector %x",
+ __entry->cpu, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index c656d92cd708..caa032ce3fe3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
- mark_tsc_unstable("running on Hyper-V");
+ if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
+ wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+ } else {
+ mark_tsc_unstable("running on Hyper-V");
+ }
/*
* Generation 2 instances don't support reading the NMI status from
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 2317d4e3daaf..287d8d58c21a 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -17,6 +17,7 @@
#include <linux/clocksource.h>
#include <linux/sched_clock.h>
#include <linux/mm.h>
+#include <linux/cpuhotplug.h>
#include <clocksource/hyperv_timer.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
* mechanism is used when running on older versions of Hyper-V
* that don't support Direct Mode. While Hyper-V provides
* four stimer's per CPU, Linux uses only stimer0.
+ *
+ * Because Direct Mode does not require processing a VMbus
+ * message, stimer interrupts can be enabled earlier in the
+ * process of booting a CPU, and consistent with when timer
+ * interrupts are enabled for other clocksource drivers.
+ * However, for legacy versions of Hyper-V when Direct Mode
+ * is not enabled, setting up stimer interrupts must be
+ * delayed until VMbus is initialized and can process the
+ * interrupt message.
*/
static bool direct_mode_enabled;
@@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
/*
* hv_stimer_init - Per-cpu initialization of the clockevent
*/
-void hv_stimer_init(unsigned int cpu)
+static int hv_stimer_init(unsigned int cpu)
{
struct clock_event_device *ce;
- /*
- * Synthetic timers are always available except on old versions of
- * Hyper-V on x86. In that case, just return as Linux will use a
- * clocksource based on emulated PIT or LAPIC timer hardware.
- */
- if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
- return;
+ if (!hv_clock_event)
+ return 0;
ce = per_cpu_ptr(hv_clock_event, cpu);
ce->name = "Hyper-V clockevent";
@@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
HV_CLOCK_HZ,
HV_MIN_DELTA_TICKS,
HV_MAX_MAX_DELTA_TICKS);
+ return 0;
}
-EXPORT_SYMBOL_GPL(hv_stimer_init);
/*
* hv_stimer_cleanup - Per-cpu cleanup of the clockevent
*/
-void hv_stimer_cleanup(unsigned int cpu)
+int hv_stimer_cleanup(unsigned int cpu)
{
struct clock_event_device *ce;
- /* Turn off clockevent device */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
- ce = per_cpu_ptr(hv_clock_event, cpu);
+ if (!hv_clock_event)
+ return 0;
+
+ /*
+ * In the legacy case where Direct Mode is not enabled
+ * (which can only be on x86/64), stimer cleanup happens
+ * relatively early in the CPU offlining process. We
+ * must unbind the stimer-based clockevent device so
+ * that the LAPIC timer can take over until clockevents
+ * are no longer needed in the offlining process. Note
+ * that clockevents_unbind_device() eventually calls
+ * hv_ce_shutdown().
+ *
+ * The unbind should not be done when Direct Mode is
+ * enabled because we may be on an architecture where
+ * there are no other clockevent devices to fallback to.
+ */
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ if (direct_mode_enabled)
hv_ce_shutdown(ce);
- }
+ else
+ clockevents_unbind_device(ce, cpu);
+
+ return 0;
}
EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
-int hv_stimer_alloc(int sint)
+int hv_stimer_alloc(void)
{
- int ret;
+ int ret = 0;
+
+ /*
+ * Synthetic timers are always available except on old versions of
+ * Hyper-V on x86. In that case, return as error as Linux will use a
+ * clockevent based on emulated LAPIC timer hardware.
+ */
+ if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
+ return -EINVAL;
hv_clock_event = alloc_percpu(struct clock_event_device);
if (!hv_clock_event)
@@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
if (direct_mode_enabled) {
ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
hv_stimer0_isr);
- if (ret) {
- free_percpu(hv_clock_event);
- hv_clock_event = NULL;
- return ret;
- }
+ if (ret)
+ goto free_percpu;
+
+ /*
+ * Since we are in Direct Mode, stimer initialization
+ * can be done now with a CPUHP value in the same range
+ * as other clockevent devices.
+ */
+ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
+ "clockevents/hyperv/stimer:starting",
+ hv_stimer_init, hv_stimer_cleanup);
+ if (ret < 0)
+ goto free_stimer0_irq;
}
+ return ret;
- stimer0_message_sint = sint;
- return 0;
+free_stimer0_irq:
+ hv_remove_stimer0_irq(stimer0_irq);
+ stimer0_irq = 0;
+free_percpu:
+ free_percpu(hv_clock_event);
+ hv_clock_event = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(hv_stimer_alloc);
+/*
+ * hv_stimer_legacy_init -- Called from the VMbus driver to handle
+ * the case when Direct Mode is not enabled, and the stimer
+ * must be initialized late in the CPU onlining process.
+ *
+ */
+void hv_stimer_legacy_init(unsigned int cpu, int sint)
+{
+ if (direct_mode_enabled)
+ return;
+
+ /*
+ * This function gets called by each vCPU, so setting the
+ * global stimer_message_sint value each time is conceptually
+ * not ideal, but the value passed in is always the same and
+ * it avoids introducing yet another interface into this
+ * clocksource driver just to set the sint in the legacy case.
+ */
+ stimer0_message_sint = sint;
+ (void)hv_stimer_init(cpu);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
+
+/*
+ * hv_stimer_legacy_cleanup -- Called from the VMbus driver to
+ * handle the case when Direct Mode is not enabled, and the
+ * stimer must be cleaned up early in the CPU offlining
+ * process.
+ */
+void hv_stimer_legacy_cleanup(unsigned int cpu)
+{
+ if (direct_mode_enabled)
+ return;
+ (void)hv_stimer_cleanup(cpu);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
+
+
/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
void hv_stimer_free(void)
{
- if (direct_mode_enabled && (stimer0_irq != 0)) {
+ if (!hv_clock_event)
+ return;
+
+ if (direct_mode_enabled) {
+ cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
hv_remove_stimer0_irq(stimer0_irq);
stimer0_irq = 0;
}
@@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
void hv_stimer_global_cleanup(void)
{
int cpu;
- struct clock_event_device *ce;
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
- for_each_present_cpu(cpu) {
- ce = per_cpu_ptr(hv_clock_event, cpu);
- clockevents_unbind_device(ce, cpu);
- }
+ /*
+ * hv_stime_legacy_cleanup() will stop the stimer if Direct
+ * Mode is not enabled, and fallback to the LAPIC timer.
+ */
+ for_each_present_cpu(cpu) {
+ hv_stimer_legacy_cleanup(cpu);
}
+
+ /*
+ * If Direct Mode is enabled, the cpuhp teardown callback
+ * (hv_stimer_cleanup) will be run on all CPUs to stop the
+ * stimers.
+ */
hv_stimer_free();
}
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index fcc52797c169..6098e0cbdb4b 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu)
{
hv_synic_enable_regs(cpu);
- hv_stimer_init(cpu);
+ hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
return 0;
}
@@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;
- hv_stimer_cleanup(cpu);
+ hv_stimer_legacy_cleanup(cpu);
hv_synic_disable_regs(cpu);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 53a60c81e220..8c06b3361c27 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void)
if (ret)
goto err_alloc;
- ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
- if (ret < 0)
- goto err_alloc;
-
/*
* Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host.
@@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
err_cpuhp:
- hv_stimer_free();
-err_alloc:
hv_synic_free();
+err_alloc:
hv_remove_vmbus_irq();
bus_unregister(&hv_bus);
@@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs)
static int hv_synic_suspend(void)
{
/*
- * When we reach here, all the non-boot CPUs have been offlined, and
- * the stimers on them have been unbound in hv_synic_cleanup() ->
+ * When we reach here, all the non-boot CPUs have been offlined.
+ * If we're in a legacy configuration where stimer Direct Mode is
+ * not enabled, the stimers on the non-boot CPUs have been unbound
+ * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
* hv_stimer_cleanup() -> clockevents_unbind_device().
*
- * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
- * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
- * because the interrupts remain disabled between syscore_suspend()
- * and syscore_resume(): see create_image() and resume_target_kernel();
+ * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
+ * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
+ * 1) it's unnecessary as interrupts remain disabled between
+ * syscore_suspend() and syscore_resume(): see create_image() and
+ * resume_target_kernel()
* 2) the stimer on CPU0 is automatically disabled later by
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
- * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
- * would be triggered if we call clockevents_unbind_device(), which
- * may sleep, in an interrupts-disabled context. So, we intentionally
- * don't call hv_stimer_cleanup(0) here.
+ * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
+ * 3) a warning would be triggered if we call
+ * clockevents_unbind_device(), which may sleep, in an
+ * interrupts-disabled context.
*/
hv_synic_disable_regs(0);
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 422f5e5237be..553e539469f0 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -21,10 +21,11 @@
#define HV_MIN_DELTA_TICKS 1
/* Routines called by the VMbus driver */
-extern int hv_stimer_alloc(int sint);
+extern int hv_stimer_alloc(void);
extern void hv_stimer_free(void);
-extern void hv_stimer_init(unsigned int cpu);
-extern void hv_stimer_cleanup(unsigned int cpu);
+extern int hv_stimer_cleanup(unsigned int cpu);
+extern void hv_stimer_legacy_init(unsigned int cpu, int sint);
+extern void hv_stimer_legacy_cleanup(unsigned int cpu);
extern void hv_stimer_global_cleanup(void);
extern void hv_stimer0_isr(void);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 89d75edb5750..e51ee772b9f5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -129,6 +129,7 @@ enum cpuhp_state {
CPUHP_AP_ARC_TIMER_STARTING,
CPUHP_AP_RISCV_TIMER_STARTING,
CPUHP_AP_CSKY_TIMER_STARTING,
+ CPUHP_AP_HYPERV_TIMER_STARTING,
CPUHP_AP_KVM_STARTING,
CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
CPUHP_AP_KVM_ARM_VGIC_STARTING,