From 1f6277bf716cc5ba0e3fa0c3e0af1adb4160fb5d Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 20 Mar 2023 00:47:37 -0700 Subject: ACPI: bus: Add stub acpi_sleep_state_supported() in non-ACPI cases acpi_sleep_state_supported() is defined only when CONFIG_ACPI=y. The function is in acpi_bus.h, and acpi_bus.h can only be used in CONFIG_ACPI=y cases. Add the stub function to linux/acpi.h to make compilation successful for !CONFIG_ACPI cases. Signed-off-by: Saurabh Sengar Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/1679298460-11855-3-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index efff750f326d..d331f76b0c19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1075,6 +1075,11 @@ static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) return 0; } +static inline bool acpi_sleep_state_supported(u8 sleep_state) +{ + return false; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC -- cgit v1.2.3 From 0459ff4873739986dccafbb417cfc69e71bdacf4 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:02 -0700 Subject: swiotlb: Remove bounce buffer remapping for Hyper-V With changes to how Hyper-V guest VMs flip memory between private (encrypted) and shared (decrypted), creating a second kernel virtual mapping for shared memory is no longer necessary. Everything needed for the transition to shared is handled by set_memory_decrypted(). As such, remove swiotlb_unencrypted_base and the associated code. Signed-off-by: Michael Kelley Acked-by: Christoph Hellwig Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/1679838727-87310-8-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/kernel/cpu/mshyperv.c | 7 +------ include/linux/swiotlb.h | 2 -- kernel/dma/swiotlb.c | 45 +----------------------------------------- 3 files changed, 2 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 315fc358e584..ac630ecde795 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -408,12 +407,8 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { + if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) static_branch_enable(&isolation_type_snp); -#ifdef CONFIG_SWIOTLB - swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; -#endif - } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index bcef10e20ea4..2ef25e6fa1b4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -180,6 +180,4 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) } #endif /* CONFIG_DMA_RESTRICTED_POOL */ -extern phys_addr_t swiotlb_unencrypted_base; - #endif /* __LINUX_SWIOTLB_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dac42a2ad588..0fb16bdbbcae 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -73,8 +73,6 @@ static bool swiotlb_force_disable; struct io_tlb_mem io_tlb_default_mem; -phys_addr_t swiotlb_unencrypted_base; - static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT; static unsigned long default_nareas; @@ -201,34 +199,6 @@ static inline unsigned long nr_slots(u64 val) return DIV_ROUND_UP(val, IO_TLB_SIZE); } -/* - * Remap swioltb memory in the unencrypted physical address space - * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP - * Isolation VMs). - */ -#ifdef CONFIG_HAS_IOMEM -static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) -{ - void *vaddr = NULL; - - if (swiotlb_unencrypted_base) { - phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; - - vaddr = memremap(paddr, bytes, MEMREMAP_WB); - if (!vaddr) - pr_err("Failed to map the unencrypted memory %pa size %lx.\n", - &paddr, bytes); - } - - return vaddr; -} -#else -static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) -{ - return NULL; -} -#endif - /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -238,18 +208,12 @@ static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) void __init swiotlb_update_mem_attributes(void) { struct io_tlb_mem *mem = &io_tlb_default_mem; - void *vaddr; unsigned long bytes; if (!mem->nslabs || mem->late_alloc) return; - vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - - mem->vaddr = swiotlb_mem_remap(mem, bytes); - if (!mem->vaddr) - mem->vaddr = vaddr; + set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT); } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, @@ -280,13 +244,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->slots[i].alloc_size = 0; } - /* - * If swiotlb_unencrypted_base is set, the bounce buffer memory will - * be remapped and cleared in swiotlb_update_mem_attributes. - */ - if (swiotlb_unencrypted_base) - return; - memset(vaddr, 0, bytes); mem->vaddr = vaddr; return; -- cgit v1.2.3 From 25727aaed6514b88f98a18862c6f2d65a0b0ec3b Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:05 -0700 Subject: hv_netvsc: Remove second mapping of send and recv buffers With changes to how Hyper-V guest VMs flip memory between private (encrypted) and shared (decrypted), creating a second kernel virtual mapping for shared memory is no longer necessary. Everything needed for the transition to shared is handled by set_memory_decrypted(). As such, remove the code to create and manage the second mapping for the pre-allocated send and recv buffers. This mapping is the last user of hv_map_memory()/hv_unmap_memory(), so delete these functions as well. Finally, hv_map_memory() is the last user of vmap_pfn() in Hyper-V guest code, so remove the Kconfig selection of VMAP_PFN. Signed-off-by: Michael Kelley Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-11-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/ivm.c | 28 ------------------------ drivers/hv/Kconfig | 1 - drivers/hv/hv_common.c | 11 ---------- drivers/net/hyperv/hyperv_net.h | 2 -- drivers/net/hyperv/netvsc.c | 48 ++--------------------------------------- include/asm-generic/mshyperv.h | 2 -- 6 files changed, 2 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index f6a020cb1a24..127d5b7b63de 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -376,34 +376,6 @@ void __init hv_vtom_init(void) #endif /* CONFIG_AMD_MEM_ENCRYPT */ -/* - * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. - */ -void *hv_map_memory(void *addr, unsigned long size) -{ - unsigned long *pfns = kcalloc(size / PAGE_SIZE, - sizeof(unsigned long), GFP_KERNEL); - void *vaddr; - int i; - - if (!pfns) - return NULL; - - for (i = 0; i < size / PAGE_SIZE; i++) - pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) + - (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); - - vaddr = vmap_pfn(pfns, size / PAGE_SIZE, pgprot_decrypted(PAGE_KERNEL)); - kfree(pfns); - - return vaddr; -} - -void hv_unmap_memory(void *addr) -{ - vunmap(addr); -} - enum hv_isolation_type hv_get_isolation_type(void) { if (!(ms_hyperv.priv_high & HV_ISOLATION)) diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 47132b30b7ee..94982f08b661 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -8,7 +8,6 @@ config HYPERV || (ACPI && ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 - select VMAP_PFN select OF_EARLY_FLATTREE if OF help Select this option to run Linux as a Hyper-V client operating diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 52a6f89ccdbd..6d40b6c7b23b 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -311,14 +311,3 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s return HV_STATUS_INVALID_PARAMETER; } EXPORT_SYMBOL_GPL(hv_ghcb_hypercall); - -void __weak *hv_map_memory(void *addr, unsigned long size) -{ - return NULL; -} -EXPORT_SYMBOL_GPL(hv_map_memory); - -void __weak hv_unmap_memory(void *addr) -{ -} -EXPORT_SYMBOL_GPL(hv_unmap_memory); diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index dd5919ec408b..33d51e363913 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -1139,7 +1139,6 @@ struct netvsc_device { /* Receive buffer allocated by us but manages by NetVSP */ void *recv_buf; - void *recv_original_buf; u32 recv_buf_size; /* allocated bytes */ struct vmbus_gpadl recv_buf_gpadl_handle; u32 recv_section_cnt; @@ -1148,7 +1147,6 @@ struct netvsc_device { /* Send buffer allocated by us */ void *send_buf; - void *send_original_buf; u32 send_buf_size; struct vmbus_gpadl send_buf_gpadl_handle; u32 send_section_cnt; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index da737d959e81..82e9796c8f5e 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -154,17 +154,8 @@ static void free_netvsc_device(struct rcu_head *head) int i; kfree(nvdev->extension); - - if (nvdev->recv_original_buf) - vfree(nvdev->recv_original_buf); - else - vfree(nvdev->recv_buf); - - if (nvdev->send_original_buf) - vfree(nvdev->send_original_buf); - else - vfree(nvdev->send_buf); - + vfree(nvdev->recv_buf); + vfree(nvdev->send_buf); bitmap_free(nvdev->send_section_map); for (i = 0; i < VRSS_CHANNEL_MAX; i++) { @@ -347,7 +338,6 @@ static int netvsc_init_buf(struct hv_device *device, struct nvsp_message *init_packet; unsigned int buf_size; int i, ret = 0; - void *vaddr; /* Get receive buffer area. */ buf_size = device_info->recv_sections * device_info->recv_section_size; @@ -383,17 +373,6 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } - if (hv_isolation_type_snp()) { - vaddr = hv_map_memory(net_device->recv_buf, buf_size); - if (!vaddr) { - ret = -ENOMEM; - goto cleanup; - } - - net_device->recv_original_buf = net_device->recv_buf; - net_device->recv_buf = vaddr; - } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -497,17 +476,6 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } - if (hv_isolation_type_snp()) { - vaddr = hv_map_memory(net_device->send_buf, buf_size); - if (!vaddr) { - ret = -ENOMEM; - goto cleanup; - } - - net_device->send_original_buf = net_device->send_buf; - net_device->send_buf = vaddr; - } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; memset(init_packet, 0, sizeof(struct nvsp_message)); @@ -762,12 +730,6 @@ void netvsc_device_remove(struct hv_device *device) netvsc_teardown_send_gpadl(device, net_device, ndev); } - if (net_device->recv_original_buf) - hv_unmap_memory(net_device->recv_buf); - - if (net_device->send_original_buf) - hv_unmap_memory(net_device->send_buf); - /* Release all resources */ free_netvsc_device_rcu(net_device); } @@ -1844,12 +1806,6 @@ cleanup: netif_napi_del(&net_device->chan_table[0].napi); cleanup2: - if (net_device->recv_original_buf) - hv_unmap_memory(net_device->recv_buf); - - if (net_device->send_original_buf) - hv_unmap_memory(net_device->send_buf); - free_netvsc_device(&net_device->rcu); return ERR_PTR(ret); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 90d7f68ed39d..afcd9ae9588c 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -271,8 +271,6 @@ u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); void hv_setup_dma_ops(struct device *dev, bool coherent); -void *hv_map_memory(void *addr, unsigned long size); -void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ static inline bool hv_is_hyperv_initialized(void) { return false; } static inline bool hv_is_hibernation_supported(void) { return false; } -- cgit v1.2.3 From 2c6ba4216844ca7918289b49ed5f3f7138ee2402 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:07 -0700 Subject: PCI: hv: Enable PCI pass-thru devices in Confidential VMs For PCI pass-thru devices in a Confidential VM, Hyper-V requires that PCI config space be accessed via hypercalls. In normal VMs, config space accesses are trapped to the Hyper-V host and emulated. But in a confidential VM, the host can't access guest memory to decode the instruction for emulation, so an explicit hypercall must be used. Add functions to make the new MMIO read and MMIO write hypercalls. Update the PCI config space access functions to use the hypercalls when such use is indicated by Hyper-V flags. Also, set the flag to allow the Hyper-V PCI driver to be loaded and used in a Confidential VM (a.k.a., "Isolation VM"). The driver has previously been hardened against a malicious Hyper-V host[1]. [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@gmail.com/ Co-developed-by: Dexuan Cui Signed-off-by: Dexuan Cui Signed-off-by: Michael Kelley Reviewed-by: Boqun Feng Reviewed-by: Haiyang Zhang Link: https://lore.kernel.org/r/1679838727-87310-13-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/include/asm/hyperv-tlfs.h | 3 + drivers/hv/channel_mgmt.c | 2 +- drivers/pci/controller/pci-hyperv.c | 232 ++++++++++++++++++++++++++---------- include/asm-generic/hyperv-tlfs.h | 22 ++++ 4 files changed, 194 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 0b73a809e9e1..b4fb75bd1013 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -122,6 +122,9 @@ /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) +/* Use hypercalls for MMIO config space access */ +#define HV_X64_USE_MMIO_HYPERCALLS BIT(21) + /* * CPU management features identification. * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits. diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index cc23b90cae02..007f26d5f1a4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -67,7 +67,7 @@ const struct vmbus_device vmbus_devs[] = { { .dev_type = HV_PCIE, HV_PCIE_GUID, .perf_device = false, - .allowed_in_isolated = false, + .allowed_in_isolated = true, }, /* Synthetic Frame Buffer */ diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index f33370b75628..337f3b4a04fc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -514,6 +514,7 @@ struct hv_pcibus_device { /* Highest slot of child device with resources allocated */ int wslot_res_allocated; + bool use_calls; /* Use hypercalls to access mmio cfg space */ /* hypercall arg, must not cross page boundary */ struct hv_retarget_device_interrupt retarget_msi_interrupt_params; @@ -1041,6 +1042,70 @@ static int wslot_to_devfn(u32 wslot) return PCI_DEVFN(slot_no.bits.dev, slot_no.bits.func); } +static void hv_pci_read_mmio(struct device *dev, phys_addr_t gpa, int size, u32 *val) +{ + struct hv_mmio_read_input *in; + struct hv_mmio_read_output *out; + u64 ret; + + /* + * Must be called with interrupts disabled so it is safe + * to use the per-cpu input argument page. Use it for + * both input and output. + */ + in = *this_cpu_ptr(hyperv_pcpu_input_arg); + out = *this_cpu_ptr(hyperv_pcpu_input_arg) + sizeof(*in); + in->gpa = gpa; + in->size = size; + + ret = hv_do_hypercall(HVCALL_MMIO_READ, in, out); + if (hv_result_success(ret)) { + switch (size) { + case 1: + *val = *(u8 *)(out->data); + break; + case 2: + *val = *(u16 *)(out->data); + break; + default: + *val = *(u32 *)(out->data); + break; + } + } else + dev_err(dev, "MMIO read hypercall error %llx addr %llx size %d\n", + ret, gpa, size); +} + +static void hv_pci_write_mmio(struct device *dev, phys_addr_t gpa, int size, u32 val) +{ + struct hv_mmio_write_input *in; + u64 ret; + + /* + * Must be called with interrupts disabled so it is safe + * to use the per-cpu input argument memory. + */ + in = *this_cpu_ptr(hyperv_pcpu_input_arg); + in->gpa = gpa; + in->size = size; + switch (size) { + case 1: + *(u8 *)(in->data) = val; + break; + case 2: + *(u16 *)(in->data) = val; + break; + default: + *(u32 *)(in->data) = val; + break; + } + + ret = hv_do_hypercall(HVCALL_MMIO_WRITE, in, NULL); + if (!hv_result_success(ret)) + dev_err(dev, "MMIO write hypercall error %llx addr %llx size %d\n", + ret, gpa, size); +} + /* * PCI Configuration Space for these root PCI buses is implemented as a pair * of pages in memory-mapped I/O space. Writing to the first page chooses @@ -1059,8 +1124,10 @@ static int wslot_to_devfn(u32 wslot) static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, u32 *val) { + struct hv_pcibus_device *hbus = hpdev->hbus; + struct device *dev = &hbus->hdev->device; + int offset = where + CFG_PAGE_OFFSET; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; /* * If the attempt is to read the IDs or the ROM BAR, simulate that. @@ -1088,56 +1155,79 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, */ *val = 0; } else if (where + size <= CFG_PAGE_SIZE) { - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); - /* Choose the function to be read. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start reading. */ - mb(); - /* Read from that function's config space. */ - switch (size) { - case 1: - *val = readb(addr); - break; - case 2: - *val = readw(addr); - break; - default: - *val = readl(addr); - break; + + spin_lock_irqsave(&hbus->config_lock, flags); + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + offset; + + hv_pci_write_mmio(dev, hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_read_mmio(dev, addr, size, val); + } else { + void __iomem *addr = hbus->cfg_addr + offset; + + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before reading. */ + mb(); + /* Read from that function's config space. */ + switch (size) { + case 1: + *val = readb(addr); + break; + case 2: + *val = readw(addr); + break; + default: + *val = readl(addr); + break; + } + /* + * Make sure the read was done before we release the + * spinlock allowing consecutive reads/writes. + */ + mb(); } - /* - * Make sure the read was done before we release the spinlock - * allowing consecutive reads/writes. - */ - mb(); - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); } else { - dev_err(&hpdev->hbus->hdev->device, - "Attempt to read beyond a function's config space.\n"); + dev_err(dev, "Attempt to read beyond a function's config space.\n"); } } static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) { + struct hv_pcibus_device *hbus = hpdev->hbus; + struct device *dev = &hbus->hdev->device; + u32 val; u16 ret; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + - PCI_VENDOR_ID; - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); + spin_lock_irqsave(&hbus->config_lock, flags); - /* Choose the function to be read. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start reading. */ - mb(); - /* Read from that function's config space. */ - ret = readw(addr); - /* - * mb() is not required here, because the spin_unlock_irqrestore() - * is a barrier. - */ + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + + CFG_PAGE_OFFSET + PCI_VENDOR_ID; + + hv_pci_write_mmio(dev, hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_read_mmio(dev, addr, 2, &val); + ret = val; /* Truncates to 16 bits */ + } else { + void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET + + PCI_VENDOR_ID; + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before we start reading. */ + mb(); + /* Read from that function's config space. */ + ret = readw(addr); + /* + * mb() is not required here, because the + * spin_unlock_irqrestore() is a barrier. + */ + } - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); return ret; } @@ -1152,39 +1242,51 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, u32 val) { + struct hv_pcibus_device *hbus = hpdev->hbus; + struct device *dev = &hbus->hdev->device; + int offset = where + CFG_PAGE_OFFSET; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; if (where >= PCI_SUBSYSTEM_VENDOR_ID && where + size <= PCI_CAPABILITY_LIST) { /* SSIDs and ROM BARs are read-only */ } else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) { - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); - /* Choose the function to be written. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start writing. */ - wmb(); - /* Write to that function's config space. */ - switch (size) { - case 1: - writeb(val, addr); - break; - case 2: - writew(val, addr); - break; - default: - writel(val, addr); - break; + spin_lock_irqsave(&hbus->config_lock, flags); + + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + offset; + + hv_pci_write_mmio(dev, hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_write_mmio(dev, addr, size, val); + } else { + void __iomem *addr = hbus->cfg_addr + offset; + + /* Choose the function to write. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before writing. */ + wmb(); + /* Write to that function's config space. */ + switch (size) { + case 1: + writeb(val, addr); + break; + case 2: + writew(val, addr); + break; + default: + writel(val, addr); + break; + } + /* + * Make sure the write was done before we release the + * spinlock allowing consecutive reads/writes. + */ + mb(); } - /* - * Make sure the write was done before we release the spinlock - * allowing consecutive reads/writes. - */ - mb(); - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); } else { - dev_err(&hpdev->hbus->hdev->device, - "Attempt to write beyond a function's config space.\n"); + dev_err(dev, "Attempt to write beyond a function's config space.\n"); } } @@ -3563,6 +3665,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr = dom; #ifdef CONFIG_X86 hbus->sysdata.domain = dom; + hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS); #elif defined(CONFIG_ARM64) /* * Set the PCI bus parent to be the corresponding VMbus @@ -3572,6 +3675,7 @@ static int hv_pci_probe(struct hv_device *hdev, * information to devices created on the bus. */ hbus->sysdata.parent = hdev->device.parent; + hbus->use_calls = false; #endif hbus->hdev = hdev; diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index b870983596b9..ea406e901469 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -168,6 +168,8 @@ union hv_reference_tsc_msr { #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db +#define HVCALL_MMIO_READ 0x0106 +#define HVCALL_MMIO_WRITE 0x0107 /* Extended hypercalls */ #define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001 @@ -796,4 +798,24 @@ struct hv_memory_hint { union hv_gpa_page_range ranges[]; } __packed; +/* Data structures for HVCALL_MMIO_READ and HVCALL_MMIO_WRITE */ +#define HV_HYPERCALL_MMIO_MAX_DATA_LENGTH 64 + +struct hv_mmio_read_input { + u64 gpa; + u32 size; + u32 reserved; +} __packed; + +struct hv_mmio_read_output { + u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; +} __packed; + +struct hv_mmio_write_input { + u64 gpa; + u32 size; + u32 reserved; + u8 data[HV_HYPERCALL_MMIO_MAX_DATA_LENGTH]; +} __packed; + #endif -- cgit v1.2.3 From d7b6ba9611aefc4bef207b16db8ff06b726efe35 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 27 Mar 2023 06:16:06 -0700 Subject: x86/hyperv: Add callback filter to cpumask_to_vpset() When copying CPUs from a Linux cpumask to a Hyper-V VPset, cpumask_to_vpset() currently has a "_noself" variant that doesn't copy the current CPU to the VPset. Generalize this variant by replacing it with a "_skip" variant having a callback function that is invoked for each CPU to decide if that CPU should be copied. Update the one caller of cpumask_to_vpset_noself() to use the new "_skip" variant instead. No functional change. Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1679922967-26582-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_apic.c | 12 ++++++++---- include/asm-generic/mshyperv.h | 22 ++++++++++++++-------- 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index fb8b2c088681..1fbda2f94184 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -96,6 +96,11 @@ static void hv_apic_eoi_write(u32 reg, u32 val) wrmsr(HV_X64_MSR_EOI, val, 0); } +static bool cpu_is_self(int cpu) +{ + return cpu == smp_processor_id(); +} + /* * IPI implementation on Hyper-V. */ @@ -128,10 +133,9 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, */ if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - if (exclude_self) - nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask); - else - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + + nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index afcd9ae9588c..402a8c1c202d 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -210,10 +210,9 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) static inline int __cpumask_to_vpset(struct hv_vpset *vpset, const struct cpumask *cpus, - bool exclude_self) + bool (*func)(int cpu)) { int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - int this_cpu = smp_processor_id(); int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK; /* vpset.valid_bank_mask can represent up to HV_MAX_SPARSE_VCPU_BANKS banks */ @@ -232,7 +231,7 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset, * Some banks may end up being empty but this is acceptable. */ for_each_cpu(cpu, cpus) { - if (exclude_self && cpu == this_cpu) + if (func && func(cpu)) continue; vcpu = hv_cpu_number_to_vp_number(cpu); if (vcpu == VP_INVAL) @@ -248,17 +247,24 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset, return nr_bank; } +/* + * Convert a Linux cpumask into a Hyper-V VPset. In the _skip variant, + * 'func' is called for each CPU present in cpumask. If 'func' returns + * true, that CPU is skipped -- i.e., that CPU from cpumask is *not* + * added to the Hyper-V VPset. If 'func' is NULL, no CPUs are + * skipped. + */ static inline int cpumask_to_vpset(struct hv_vpset *vpset, const struct cpumask *cpus) { - return __cpumask_to_vpset(vpset, cpus, false); + return __cpumask_to_vpset(vpset, cpus, NULL); } -static inline int cpumask_to_vpset_noself(struct hv_vpset *vpset, - const struct cpumask *cpus) +static inline int cpumask_to_vpset_skip(struct hv_vpset *vpset, + const struct cpumask *cpus, + bool (*func)(int cpu)) { - WARN_ON_ONCE(preemptible()); - return __cpumask_to_vpset(vpset, cpus, true); + return __cpumask_to_vpset(vpset, cpus, func); } void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die); -- cgit v1.2.3 From c26e0527aaf84a34b4774d80c9a9baa65f4d77f2 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Mon, 10 Apr 2023 22:55:29 -0700 Subject: x86/hyperv: Add VTL specific structs and hypercalls Add structs and hypercalls required to enable VTL support on x86. Signed-off-by: Saurabh Sengar Reviewed-by: Michael Kelley Reviewed-by: Stanislav Kinsburskii Link: https://lore.kernel.org/r/1681192532-15460-3-git-send-email-ssengar@linux.microsoft.com Signed-off-by: Wei Liu --- arch/x86/include/asm/hyperv-tlfs.h | 75 ++++++++++++++++++++++++++++++++++++++ include/asm-generic/hyperv-tlfs.h | 4 ++ 2 files changed, 79 insertions(+) (limited to 'include') diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index b4fb75bd1013..cea95dcd27c2 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -716,6 +716,81 @@ union hv_msi_entry { } __packed; }; +struct hv_x64_segment_register { + u64 base; + u32 limit; + u16 selector; + union { + struct { + u16 segment_type : 4; + u16 non_system_segment : 1; + u16 descriptor_privilege_level : 2; + u16 present : 1; + u16 reserved : 4; + u16 available : 1; + u16 _long : 1; + u16 _default : 1; + u16 granularity : 1; + } __packed; + u16 attributes; + }; +} __packed; + +struct hv_x64_table_register { + u16 pad[3]; + u16 limit; + u64 base; +} __packed; + +struct hv_init_vp_context { + u64 rip; + u64 rsp; + u64 rflags; + + struct hv_x64_segment_register cs; + struct hv_x64_segment_register ds; + struct hv_x64_segment_register es; + struct hv_x64_segment_register fs; + struct hv_x64_segment_register gs; + struct hv_x64_segment_register ss; + struct hv_x64_segment_register tr; + struct hv_x64_segment_register ldtr; + + struct hv_x64_table_register idtr; + struct hv_x64_table_register gdtr; + + u64 efer; + u64 cr0; + u64 cr3; + u64 cr4; + u64 msr_cr_pat; +} __packed; + +union hv_input_vtl { + u8 as_uint8; + struct { + u8 target_vtl: 4; + u8 use_target_vtl: 1; + u8 reserved_z: 3; + }; +} __packed; + +struct hv_enable_vp_vtl { + u64 partition_id; + u32 vp_index; + union hv_input_vtl target_vtl; + u8 mbz0; + u16 mbz1; + struct hv_init_vp_context vp_context; +} __packed; + +struct hv_get_vp_from_apic_id_in { + u64 partition_id; + union hv_input_vtl target_vtl; + u8 res[7]; + u32 apic_ids[]; +} __packed; + #include #endif diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h index ea406e901469..f4e4cc4f965f 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -146,6 +146,7 @@ union hv_reference_tsc_msr { /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 +#define HVCALL_ENABLE_VP_VTL 0x000f #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 #define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 @@ -165,6 +166,8 @@ union hv_reference_tsc_msr { #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d #define HVCALL_RETARGET_INTERRUPT 0x007e +#define HVCALL_START_VP 0x0099 +#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db @@ -220,6 +223,7 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +#define HV_STATUS_VTL_ALREADY_ENABLED 134 /* * The Hyper-V TimeRefCount register and the TSC -- cgit v1.2.3