diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-19 08:48:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-19 08:48:09 -0700 |
commit | 0815d5cc7dfb4a2c6d02a6eb86974ab3992b803d (patch) | |
tree | 2116699a753b2cef5951e29676cbfde423b44003 | |
parent | b3603fcb79b1036acae10602bffc4855a4b9af80 (diff) | |
parent | d277f9d82802223f242cd9b60c988cfdda1d6be0 (diff) | |
download | lwn-0815d5cc7dfb4a2c6d02a6eb86974ab3992b803d.tar.gz lwn-0815d5cc7dfb4a2c6d02a6eb86974ab3992b803d.zip |
Merge tag 'for-linus-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from Juergen Gross:
- Xen event channel handling fix for a regression with a rare kernel
config and some added hardening
- better support of running Xen dom0 in PVH mode
- a cleanup for the xen grant-dma-iommu driver
* tag 'for-linus-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/events: increment refcnt only if event channel is refcounted
xen/evtchn: avoid WARN() when unbinding an event channel
x86/xen: attempt to inflate the memory balloon on PVH
xen/grant-dma-iommu: Convert to platform remove callback returning void
-rw-r--r-- | arch/x86/include/asm/xen/hypervisor.h | 5 | ||||
-rw-r--r-- | arch/x86/platform/pvh/enlighten.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 32 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pvh.c | 68 | ||||
-rw-r--r-- | arch/x86/xen/setup.c | 44 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 14 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 2 | ||||
-rw-r--r-- | drivers/xen/events/events_base.c | 22 | ||||
-rw-r--r-- | drivers/xen/evtchn.c | 6 | ||||
-rw-r--r-- | drivers/xen/grant-dma-iommu.c | 6 |
10 files changed, 143 insertions, 59 deletions
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a9088250770f..64fbd2dbc5b7 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,11 @@ void xen_arch_unregister_cpu(int num); #ifdef CONFIG_PVH void __init xen_pvh_init(struct boot_params *boot_params); void __init mem_map_via_hcall(struct boot_params *boot_params_p); +#ifdef CONFIG_XEN_PVH +void __init xen_reserve_extra_memory(struct boot_params *bootp); +#else +static inline void xen_reserve_extra_memory(struct boot_params *bootp) { } +#endif #endif /* Lazy mode for batching updates / context switch */ diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 944e0290f2c0..8c2d4b8de25d 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -75,6 +75,9 @@ static void __init init_pvh_bootparams(bool xen_guest) } else xen_raw_printk("Warning: Can fit ISA range into e820\n"); + if (xen_guest) + xen_reserve_extra_memory(&pvh_bootparams); + pvh_bootparams.hdr.cmd_line_ptr = pvh_start_info.cmdline_paddr; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3c61bb98c10e..a01ca255b0c6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -6,6 +6,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/kexec.h> +#include <linux/memblock.h> #include <linux/slab.h> #include <linux/panic_notifier.h> @@ -350,3 +351,34 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif + +/* Amount of extra memory space we add to the e820 ranges */ +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; + +void __init xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns) +{ + unsigned int i; + + /* + * No need to check for zero size, should happen rarely and will only + * write a new entry regarded to be unused due to zero size. + */ + for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { + /* Add new region. */ + if (xen_extra_mem[i].n_pfns == 0) { + xen_extra_mem[i].start_pfn = start_pfn; + xen_extra_mem[i].n_pfns = n_pfns; + break; + } + /* Append to existing region. */ + if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns == + start_pfn) { + xen_extra_mem[i].n_pfns += n_pfns; + break; + } + } + if (i == XEN_EXTRA_MEM_MAX_REGIONS) + printk(KERN_WARNING "Warning: not enough extra memory regions\n"); + + memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); +} diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 9e9db601bd52..27a2a02ef8fb 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/acpi.h> #include <linux/export.h> +#include <linux/mm.h> #include <xen/hvc-console.h> @@ -73,3 +74,70 @@ void __init mem_map_via_hcall(struct boot_params *boot_params_p) } boot_params_p->e820_entries = memmap.nr_entries; } + +/* + * Reserve e820 UNUSABLE regions to inflate the memory balloon. + * + * On PVH dom0 the host memory map is used, RAM regions available to dom0 are + * located as the same place as in the native memory map, but since dom0 gets + * less memory than the total amount of host RAM the ranges that can't be + * populated are converted from RAM -> UNUSABLE. Use such regions (up to the + * ratio signaled in EXTRA_MEM_RATIO) in order to inflate the balloon driver at + * boot. Doing so prevents the guest (even if just temporary) from using holes + * in the memory map in order to map grants or foreign addresses, and + * hopefully limits the risk of a clash with a device MMIO region. Ideally the + * hypervisor should notify us which memory ranges are suitable for creating + * foreign mappings, but that's not yet implemented. + */ +void __init xen_reserve_extra_memory(struct boot_params *bootp) +{ + unsigned int i, ram_pages = 0, extra_pages; + + for (i = 0; i < bootp->e820_entries; i++) { + struct boot_e820_entry *e = &bootp->e820_table[i]; + + if (e->type != E820_TYPE_RAM) + continue; + ram_pages += PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr); + } + + /* Max amount of extra memory. */ + extra_pages = EXTRA_MEM_RATIO * ram_pages; + + /* + * Convert UNUSABLE ranges to RAM and reserve them for foreign mapping + * purposes. + */ + for (i = 0; i < bootp->e820_entries && extra_pages; i++) { + struct boot_e820_entry *e = &bootp->e820_table[i]; + unsigned long pages; + + if (e->type != E820_TYPE_UNUSABLE) + continue; + + pages = min(extra_pages, + PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr)); + + if (pages != (PFN_DOWN(e->addr + e->size) - PFN_UP(e->addr))) { + struct boot_e820_entry *next; + + if (bootp->e820_entries == + ARRAY_SIZE(bootp->e820_table)) + /* No space left to split - skip region. */ + continue; + + /* Split entry. */ + next = e + 1; + memmove(next, e, + (bootp->e820_entries - i) * sizeof(*e)); + bootp->e820_entries++; + next->addr = PAGE_ALIGN(e->addr) + PFN_PHYS(pages); + e->size = next->addr - e->addr; + next->size -= e->size; + } + e->type = E820_TYPE_RAM; + extra_pages -= pages; + + xen_add_extra_mem(PFN_UP(e->addr), pages); + } +} diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index b3e37961065a..380591028cb8 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -38,9 +38,6 @@ #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) -/* Amount of extra memory space we add to the e820 ranges */ -struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; - /* Number of pages released from the initial allocation. */ unsigned long xen_released_pages; @@ -64,18 +61,6 @@ static struct { } xen_remap_buf __initdata __aligned(PAGE_SIZE); static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; -/* - * The maximum amount of extra memory compared to the base size. The - * main scaling factor is the size of struct page. At extreme ratios - * of base:extra, all the base memory can be filled with page - * structures for the extra memory, leaving no space for anything - * else. - * - * 10x seems like a reasonable balance between scaling flexibility and - * leaving a practically usable system. - */ -#define EXTRA_MEM_RATIO (10) - static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); static void __init xen_parse_512gb(void) @@ -96,35 +81,6 @@ static void __init xen_parse_512gb(void) xen_512gb_limit = val; } -static void __init xen_add_extra_mem(unsigned long start_pfn, - unsigned long n_pfns) -{ - int i; - - /* - * No need to check for zero size, should happen rarely and will only - * write a new entry regarded to be unused due to zero size. - */ - for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { - /* Add new region. */ - if (xen_extra_mem[i].n_pfns == 0) { - xen_extra_mem[i].start_pfn = start_pfn; - xen_extra_mem[i].n_pfns = n_pfns; - break; - } - /* Append to existing region. */ - if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns == - start_pfn) { - xen_extra_mem[i].n_pfns += n_pfns; - break; - } - } - if (i == XEN_EXTRA_MEM_MAX_REGIONS) - printk(KERN_WARNING "Warning: not enough extra memory regions\n"); - - memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); -} - static void __init xen_del_extra_mem(unsigned long start_pfn, unsigned long n_pfns) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a87ab36889e7..79cf93f2c92f 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -163,4 +163,18 @@ void xen_hvm_post_suspend(int suspend_cancelled); static inline void xen_hvm_post_suspend(int suspend_cancelled) {} #endif +/* + * The maximum amount of extra memory compared to the base size. The + * main scaling factor is the size of struct page. At extreme ratios + * of base:extra, all the base memory can be filled with page + * structures for the extra memory, leaving no space for anything + * else. + * + * 10x seems like a reasonable balance between scaling flexibility and + * leaving a practically usable system. + */ +#define EXTRA_MEM_RATIO (10) + +void xen_add_extra_mem(unsigned long start_pfn, unsigned long n_pfns); + #endif /* XEN_OPS_H */ diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 976c6cdf9ee6..aaf2514fcfa4 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -672,7 +672,6 @@ EXPORT_SYMBOL(xen_free_ballooned_pages); static void __init balloon_add_regions(void) { -#if defined(CONFIG_XEN_PV) unsigned long start_pfn, pages; unsigned long pfn, extra_pfn_end; unsigned int i; @@ -696,7 +695,6 @@ static void __init balloon_add_regions(void) balloon_stats.total_pages += extra_pfn_end - start_pfn; } -#endif } static int __init balloon_init(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 2faa4bf78c7a..81effbd53dc5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1190,7 +1190,7 @@ int xen_pirq_from_irq(unsigned irq) EXPORT_SYMBOL_GPL(xen_pirq_from_irq); static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, - struct xenbus_device *dev) + struct xenbus_device *dev, bool shared) { int ret = -ENOMEM; struct irq_info *info; @@ -1224,7 +1224,8 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, */ bind_evtchn_to_cpu(info, 0, false); } else if (!WARN_ON(info->type != IRQT_EVTCHN)) { - info->refcnt++; + if (shared && !WARN_ON(info->refcnt < 0)) + info->refcnt++; } ret = info->irq; @@ -1237,13 +1238,13 @@ out: int bind_evtchn_to_irq(evtchn_port_t evtchn) { - return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL); + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL, false); } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) { - return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL); + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip, NULL, false); } EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); @@ -1295,7 +1296,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, evtchn_port_t remote_port, - struct irq_chip *chip) + struct irq_chip *chip, + bool shared) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -1307,14 +1309,14 @@ static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, &bind_interdomain); return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, - chip, dev); + chip, dev, shared); } int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, evtchn_port_t remote_port) { return bind_interdomain_evtchn_to_irq_chip(dev, remote_port, - &xen_lateeoi_chip); + &xen_lateeoi_chip, false); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); @@ -1430,7 +1432,8 @@ static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, { int irq, retval; - irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL); + irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL, + irqflags & IRQF_SHARED); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1471,7 +1474,8 @@ static int bind_interdomain_evtchn_to_irqhandler_chip( { int irq, retval; - irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip); + irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip, + irqflags & IRQF_SHARED); if (irq < 0) return irq; diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 59717628ca42..f6a2216c2c87 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -85,6 +85,7 @@ struct user_evtchn { struct per_user_data *user; evtchn_port_t port; bool enabled; + bool unbinding; }; static void evtchn_free_ring(evtchn_port_t *ring) @@ -164,6 +165,10 @@ static irqreturn_t evtchn_interrupt(int irq, void *data) struct per_user_data *u = evtchn->user; unsigned int prod, cons; + /* Handler might be called when tearing down the IRQ. */ + if (evtchn->unbinding) + return IRQ_HANDLED; + WARN(!evtchn->enabled, "Interrupt for port %u, but apparently not enabled; per-user %p\n", evtchn->port, u); @@ -421,6 +426,7 @@ static void evtchn_unbind_from_user(struct per_user_data *u, BUG_ON(irq < 0); + evtchn->unbinding = true; unbind_from_irqhandler(irq, evtchn); del_evtchn(u, evtchn); diff --git a/drivers/xen/grant-dma-iommu.c b/drivers/xen/grant-dma-iommu.c index 6a9fe02c6bfc..2ee750a03c2f 100644 --- a/drivers/xen/grant-dma-iommu.c +++ b/drivers/xen/grant-dma-iommu.c @@ -51,14 +51,12 @@ static int grant_dma_iommu_probe(struct platform_device *pdev) return 0; } -static int grant_dma_iommu_remove(struct platform_device *pdev) +static void grant_dma_iommu_remove(struct platform_device *pdev) { struct grant_dma_iommu_device *mmu = platform_get_drvdata(pdev); platform_set_drvdata(pdev, NULL); iommu_device_unregister(&mmu->iommu); - - return 0; } static struct platform_driver grant_dma_iommu_driver = { @@ -67,7 +65,7 @@ static struct platform_driver grant_dma_iommu_driver = { .of_match_table = grant_dma_iommu_of_match, }, .probe = grant_dma_iommu_probe, - .remove = grant_dma_iommu_remove, + .remove_new = grant_dma_iommu_remove, }; static int __init grant_dma_iommu_init(void) |