diff options
Diffstat (limited to 'arch/x86/hyperv')
-rw-r--r-- | arch/x86/hyperv/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_apic.c | 5 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 35 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_proc.c | 198 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_vtl.c | 34 | ||||
-rw-r--r-- | arch/x86/hyperv/irqdomain.c | 6 | ||||
-rw-r--r-- | arch/x86/hyperv/ivm.c | 2 | ||||
-rw-r--r-- | arch/x86/hyperv/mmu.c | 5 |
8 files changed, 54 insertions, 233 deletions
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 3a1548054b48..d55f494f471d 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o -obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o +obj-$(CONFIG_X86_64) += hv_apic.o obj-$(CONFIG_HYPERV_VTL_MODE) += hv_vtl.o ifdef CONFIG_X86_64 diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index f022d5f64fb6..6d91ac5f9836 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -145,6 +145,11 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; } + /* + * For this hypercall, Hyper-V treats the valid_bank_mask field + * of ipi_arg->vp_set as part of the fixed size input header. + * So the variable input header size is equal to nr_bank. + */ status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, ipi_arg, NULL); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 173005e6a95d..ddeb40930bc8 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -34,9 +34,6 @@ #include <clocksource/hyperv_timer.h> #include <linux/highmem.h> -u64 hv_current_partition_id = ~0ull; -EXPORT_SYMBOL_GPL(hv_current_partition_id); - void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -93,7 +90,7 @@ static int hv_cpu_init(unsigned int cpu) return 0; hvp = &hv_vp_assist_page[cpu]; - if (hv_root_partition) { + if (hv_root_partition()) { /* * For root partition we get the hypervisor provided VP assist * page, instead of allocating a new page. @@ -245,7 +242,7 @@ static int hv_cpu_die(unsigned int cpu) if (hv_vp_assist_page && hv_vp_assist_page[cpu]) { union hv_vp_assist_msr_contents msr = { 0 }; - if (hv_root_partition) { + if (hv_root_partition()) { /* * For root partition the VP assist page is mapped to * hypervisor provided page, and thus we unmap the @@ -320,7 +317,7 @@ static int hv_suspend(void) union hv_x64_msr_hypercall_contents hypercall_msr; int ret; - if (hv_root_partition) + if (hv_root_partition()) return -EPERM; /* @@ -393,24 +390,6 @@ static void __init hv_stimer_setup_percpu_clockev(void) old_setup_percpu_clockev(); } -static void __init hv_get_partition_id(void) -{ - struct hv_get_partition_id *output_page; - u64 status; - unsigned long flags; - - local_irq_save(flags); - output_page = *this_cpu_ptr(hyperv_pcpu_output_arg); - status = hv_do_hypercall(HVCALL_GET_PARTITION_ID, NULL, output_page); - if (!hv_result_success(status)) { - /* No point in proceeding if this failed */ - pr_err("Failed to get partition ID: %lld\n", status); - BUG(); - } - hv_current_partition_id = output_page->partition_id; - local_irq_restore(flags); -} - #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) static u8 __init get_vtl(void) { @@ -539,7 +518,7 @@ void __init hyperv_init(void) rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; - if (hv_root_partition) { + if (hv_root_partition()) { struct page *pg; void *src; @@ -605,17 +584,15 @@ skip_hypercall_pg_init: register_syscore_ops(&hv_syscore_ops); - if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_ACCESS_PARTITION_ID) + if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID) hv_get_partition_id(); - BUG_ON(hv_root_partition && hv_current_partition_id == ~0ull); - #ifdef CONFIG_PCI_MSI /* * If we're running as root, we want to create our own PCI MSI domain. * We can't set this in hv_pci_init because that would be too late. */ - if (hv_root_partition) + if (hv_root_partition()) x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain; #endif diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c deleted file mode 100644 index ac4c834d4435..000000000000 --- a/arch/x86/hyperv/hv_proc.c +++ /dev/null @@ -1,198 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/vmalloc.h> -#include <linux/mm.h> -#include <linux/clockchips.h> -#include <linux/slab.h> -#include <linux/cpuhotplug.h> -#include <linux/minmax.h> -#include <asm/hypervisor.h> -#include <asm/mshyperv.h> -#include <asm/apic.h> - -#include <asm/trace/hyperv.h> - -/* - * See struct hv_deposit_memory. The first u64 is partition ID, the rest - * are GPAs. - */ -#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1) - -/* Deposits exact number of pages. Must be called with interrupts enabled. */ -int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages) -{ - struct page **pages, *page; - int *counts; - int num_allocations; - int i, j, page_count; - int order; - u64 status; - int ret; - u64 base_pfn; - struct hv_deposit_memory *input_page; - unsigned long flags; - - if (num_pages > HV_DEPOSIT_MAX) - return -E2BIG; - if (!num_pages) - return 0; - - /* One buffer for page pointers and counts */ - page = alloc_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - pages = page_address(page); - - counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL); - if (!counts) { - free_page((unsigned long)pages); - return -ENOMEM; - } - - /* Allocate all the pages before disabling interrupts */ - i = 0; - - while (num_pages) { - /* Find highest order we can actually allocate */ - order = 31 - __builtin_clz(num_pages); - - while (1) { - pages[i] = alloc_pages_node(node, GFP_KERNEL, order); - if (pages[i]) - break; - if (!order) { - ret = -ENOMEM; - num_allocations = i; - goto err_free_allocations; - } - --order; - } - - split_page(pages[i], order); - counts[i] = 1 << order; - num_pages -= counts[i]; - i++; - } - num_allocations = i; - - local_irq_save(flags); - - input_page = *this_cpu_ptr(hyperv_pcpu_input_arg); - - input_page->partition_id = partition_id; - - /* Populate gpa_page_list - these will fit on the input page */ - for (i = 0, page_count = 0; i < num_allocations; ++i) { - base_pfn = page_to_pfn(pages[i]); - for (j = 0; j < counts[i]; ++j, ++page_count) - input_page->gpa_page_list[page_count] = base_pfn + j; - } - status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY, - page_count, 0, input_page, NULL); - local_irq_restore(flags); - if (!hv_result_success(status)) { - pr_err("Failed to deposit pages: %lld\n", status); - ret = hv_result(status); - goto err_free_allocations; - } - - ret = 0; - goto free_buf; - -err_free_allocations: - for (i = 0; i < num_allocations; ++i) { - base_pfn = page_to_pfn(pages[i]); - for (j = 0; j < counts[i]; ++j) - __free_page(pfn_to_page(base_pfn + j)); - } - -free_buf: - free_page((unsigned long)pages); - kfree(counts); - return ret; -} - -int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) -{ - struct hv_input_add_logical_processor *input; - struct hv_output_add_logical_processor *output; - u64 status; - unsigned long flags; - int ret = HV_STATUS_SUCCESS; - - /* - * When adding a logical processor, the hypervisor may return - * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more - * pages and retry. - */ - do { - local_irq_save(flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - /* We don't do anything with the output right now */ - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - input->lp_index = lp_index; - input->apic_id = apic_id; - input->proximity_domain_info = hv_numa_node_to_pxm_info(node); - status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, - input, output); - local_irq_restore(flags); - - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { - if (!hv_result_success(status)) { - pr_err("%s: cpu %u apic ID %u, %lld\n", __func__, - lp_index, apic_id, status); - ret = hv_result(status); - } - break; - } - ret = hv_call_deposit_pages(node, hv_current_partition_id, 1); - } while (!ret); - - return ret; -} - -int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) -{ - struct hv_create_vp *input; - u64 status; - unsigned long irq_flags; - int ret = HV_STATUS_SUCCESS; - - /* Root VPs don't seem to need pages deposited */ - if (partition_id != hv_current_partition_id) { - /* The value 90 is empirically determined. It may change. */ - ret = hv_call_deposit_pages(node, partition_id, 90); - if (ret) - return ret; - } - - do { - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - - input->partition_id = partition_id; - input->vp_index = vp_index; - input->flags = flags; - input->subnode_type = HV_SUBNODE_ANY; - input->proximity_domain_info = hv_numa_node_to_pxm_info(node); - status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); - local_irq_restore(irq_flags); - - if (hv_result(status) != HV_STATUS_INSUFFICIENT_MEMORY) { - if (!hv_result_success(status)) { - pr_err("%s: vcpu %u, lp %u, %lld\n", __func__, - vp_index, flags, status); - ret = hv_result(status); - } - break; - } - ret = hv_call_deposit_pages(node, partition_id, 1); - - } while (!ret); - - return ret; -} - diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 3f4e20d7b724..13242ed8ff16 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -12,6 +12,7 @@ #include <asm/i8259.h> #include <asm/mshyperv.h> #include <asm/realmode.h> +#include <asm/reboot.h> #include <../kernel/smpboot.h> extern struct boot_params boot_params; @@ -22,6 +23,36 @@ static bool __init hv_vtl_msi_ext_dest_id(void) return true; } +/* + * The `native_machine_emergency_restart` function from `reboot.c` writes + * to the physical address 0x472 to indicate the type of reboot for the + * firmware. We cannot have that in VSM as the memory composition might + * be more generic, and such write effectively corrupts the memory thus + * making diagnostics harder at the very least. + */ +static void __noreturn hv_vtl_emergency_restart(void) +{ + /* + * Cause a triple fault and the immediate reset. Here the code does not run + * on the top of any firmware, whereby cannot reach out to its services. + * The inifinite loop is for the improbable case that the triple fault does + * not work and have to preserve the state intact for debugging. + */ + for (;;) { + idt_invalidate(); + __asm__ __volatile__("int3"); + } +} + +/* + * The only way to restart in the VTL mode is to triple fault as the kernel runs + * as firmware. + */ +static void __noreturn hv_vtl_restart(char __maybe_unused *cmd) +{ + hv_vtl_emergency_restart(); +} + void __init hv_vtl_init_platform(void) { pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); @@ -236,6 +267,9 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) int __init hv_vtl_early_init(void) { + machine_ops.emergency_restart = hv_vtl_emergency_restart; + machine_ops.restart = hv_vtl_restart; + /* * `boot_cpu_has` returns the runtime feature support, * and here is the earliest it can be used. diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c index 64b921360b0f..31f0d29cbc5e 100644 --- a/arch/x86/hyperv/irqdomain.c +++ b/arch/x86/hyperv/irqdomain.c @@ -64,7 +64,7 @@ static int hv_map_interrupt(union hv_device_id device_id, bool level, local_irq_restore(flags); if (!hv_result_success(status)) - pr_err("%s: hypercall failed, status %lld\n", __func__, status); + hv_status_err(status, "\n"); return hv_result(status); } @@ -224,7 +224,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) kfree(stored_entry); if (status != HV_STATUS_SUCCESS) { - pr_debug("%s: failed to unmap, status %lld", __func__, status); + hv_status_debug(status, "failed to unmap\n"); return; } } @@ -273,7 +273,7 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd) status = hv_unmap_msi_interrupt(dev, &old_entry); if (status != HV_STATUS_SUCCESS) - pr_err("%s: hypercall failed, status %lld\n", __func__, status); + hv_status_err(status, "\n"); } static void hv_msi_free_irq(struct irq_domain *domain, diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index ec7880271cf9..77bf05f06b9e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -338,7 +338,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) vmsa->sev_features = sev_status >> 2; ret = snp_set_vmsa(vmsa, true); - if (!ret) { + if (ret) { pr_err("RMPADJUST(%llx) failed: %llx\n", (u64)vmsa, ret); free_page((u64)vmsa); return ret; diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index cc8c3bd0e7c2..cfcb60468b01 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -205,6 +205,10 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, /* * We can flush not more than max_gvas with one hypercall. Flush the * whole address space if we were asked to do more. + * + * For these hypercalls, Hyper-V treats the valid_bank_mask field + * of flush->hv_vp_set as part of the fixed size input header. + * So the variable input header size is equal to nr_bank. */ max_gvas = (PAGE_SIZE - sizeof(*flush) - nr_bank * @@ -239,5 +243,4 @@ void hyperv_setup_mmu_ops(void) pr_info("Using hypercall for remote TLB flush\n"); pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi; - pv_ops.mmu.tlb_remove_table = tlb_remove_table; } |