From 1f7df3a691740a7736bbc99dc4ed536120eb4746 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 19 Feb 2025 17:31:36 -0800 Subject: genirq/msi: Store the IOMMU IOVA directly in msi_desc instead of iommu_cookie The IOMMU translation for MSI message addresses has been a 2-step process, separated in time: 1) iommu_dma_prepare_msi(): A cookie pointer containing the IOVA address is stored in the MSI descriptor when an MSI interrupt is allocated. 2) iommu_dma_compose_msi_msg(): this cookie pointer is used to compute a translated message address. This has an inherent lifetime problem for the pointer stored in the cookie that must remain valid between the two steps. However, there is no locking at the irq layer that helps protect the lifetime. Today, this works under the assumption that the iommu domain is not changed while MSI interrupts being programmed. This is true for normal DMA API users within the kernel, as the iommu domain is attached before the driver is probed and cannot be changed while a driver is attached. Classic VFIO type1 also prevented changing the iommu domain while VFIO was running as it does not support changing the "container" after starting up. However, iommufd has improved this so that the iommu domain can be changed during VFIO operation. This potentially allows userspace to directly race VFIO_DEVICE_ATTACH_IOMMUFD_PT (which calls iommu_attach_group()) and VFIO_DEVICE_SET_IRQS (which calls into iommu_dma_compose_msi_msg()). This potentially causes both the cookie pointer and the unlocked call to iommu_get_domain_for_dev() on the MSI translation path to become UAFs. Fix the MSI cookie UAF by removing the cookie pointer. The translated IOVA address is already known during iommu_dma_prepare_msi() and cannot change. Thus, it can simply be stored as an integer in the MSI descriptor. The other UAF related to iommu_get_domain_for_dev() will be addressed in patch "iommu: Make iommu_dma_prepare_msi() into a generic operation" by using the IOMMU group mutex. Link: https://patch.msgid.link/r/a4f2cd76b9dc1833ee6c1cf325cba57def22231c.1740014950.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/msi.h | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'include/linux/msi.h') diff --git a/include/linux/msi.h b/include/linux/msi.h index b10093c4d00e..fc4f3774c3af 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -166,6 +166,10 @@ struct msi_desc_data { * @dev: Pointer to the device which uses this descriptor * @msg: The last set MSI message cached for reuse * @affinity: Optional pointer to a cpu affinity mask for this descriptor + * @iommu_msi_iova: Optional shifted IOVA from the IOMMU to override the msi_addr. + * Only used if iommu_msi_shift != 0 + * @iommu_msi_shift: Indicates how many bits of the original address should be + * preserved when using iommu_msi_iova. * @sysfs_attr: Pointer to sysfs device attribute * * @write_msi_msg: Callback that may be called when the MSI message @@ -184,7 +188,8 @@ struct msi_desc { struct msi_msg msg; struct irq_affinity_desc *affinity; #ifdef CONFIG_IRQ_MSI_IOMMU - const void *iommu_cookie; + u64 iommu_msi_iova : 58; + u64 iommu_msi_shift : 6; #endif #ifdef CONFIG_SYSFS struct device_attribute *sysfs_attrs; @@ -285,28 +290,14 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, #define msi_desc_to_dev(desc) ((desc)->dev) -#ifdef CONFIG_IRQ_MSI_IOMMU -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) -{ - return desc->iommu_cookie; -} - -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) -{ - desc->iommu_cookie = iommu_cookie; -} -#else -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) +static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, u64 msi_iova, + unsigned int msi_shift) { - return NULL; -} - -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, - const void *iommu_cookie) -{ -} +#ifdef CONFIG_IRQ_MSI_IOMMU + desc->iommu_msi_iova = msi_iova >> msi_shift; + desc->iommu_msi_shift = msi_shift; #endif +} int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, struct msi_desc *init_desc); -- cgit v1.2.3 From 9349887e93009331e751854843f73a086bef4018 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 19 Feb 2025 17:31:37 -0800 Subject: genirq/msi: Refactor iommu_dma_compose_msi_msg() The two-step process to translate the MSI address involves two functions, iommu_dma_prepare_msi() and iommu_dma_compose_msi_msg(). Previously iommu_dma_compose_msi_msg() needed to be in the iommu layer as it had to dereference the opaque cookie pointer. Now, the previous patch changed the cookie pointer into an integer so there is no longer any need for the iommu layer to be involved. Further, the call sites of iommu_dma_compose_msi_msg() all follow the same pattern of setting an MSI message address_hi/lo to non-translated and then immediately calling iommu_dma_compose_msi_msg(). Refactor iommu_dma_compose_msi_msg() into msi_msg_set_addr() that directly accepts the u64 version of the address and simplifies all the callers. Move the new helper to linux/msi.h since it has nothing to do with iommu. Aside from refactoring, this logically prepares for the next patch, which allows multiple implementation options for iommu_dma_prepare_msi(). So, it does not make sense to have the iommu_dma_compose_msi_msg() in dma-iommu.c as it no longer provides the only iommu_dma_prepare_msi() implementation. Link: https://patch.msgid.link/r/eda62a9bafa825e9cdabd7ddc61ad5a21c32af24.1740014950.git.nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- drivers/iommu/dma-iommu.c | 18 ------------------ drivers/irqchip/irq-gic-v2m.c | 5 +---- drivers/irqchip/irq-gic-v3-its.c | 13 +++---------- drivers/irqchip/irq-gic-v3-mbi.c | 12 ++++-------- drivers/irqchip/irq-ls-scfg-msi.c | 5 ++--- include/linux/iommu.h | 6 ------ include/linux/msi.h | 28 ++++++++++++++++++++++++++++ 7 files changed, 38 insertions(+), 49 deletions(-) (limited to 'include/linux/msi.h') diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 0f0caf59023c..bf91e014d179 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1836,24 +1836,6 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return 0; } -/** - * iommu_dma_compose_msi_msg() - Apply translation to an MSI message - * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() - * @msg: MSI message containing target physical address - */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ -#ifdef CONFIG_IRQ_MSI_IOMMU - if (desc->iommu_msi_shift) { - u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; - - msg->address_hi = upper_32_bits(msi_iova); - msg->address_lo = lower_32_bits(msi_iova) | - (msg->address_lo & ((1 << desc->iommu_msi_shift) - 1)); - } -#endif -} - static int iommu_dma_init(void) { if (is_kdump_kernel()) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index be35c5349986..57e0470e0d13 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -87,9 +87,6 @@ static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct v2m_data *v2m = irq_data_get_irq_chip_data(data); phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq); - msg->address_hi = upper_32_bits(addr); - msg->address_lo = lower_32_bits(addr); - if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) msg->data = 0; else @@ -97,7 +94,7 @@ static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET) msg->data -= v2m->spi_offset; - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); + msi_msg_set_addr(irq_data_get_msi_desc(data), msg, addr); } static struct irq_chip gicv2m_irq_chip = { diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 8c3ec5734f1e..ce0bf70b9eaf 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1809,17 +1809,10 @@ static u64 its_irq_get_msi_base(struct its_device *its_dev) static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); - struct its_node *its; - u64 addr; - - its = its_dev->its; - addr = its->get_msi_base(its_dev); - - msg->address_lo = lower_32_bits(addr); - msg->address_hi = upper_32_bits(addr); - msg->data = its_get_event_id(d); - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg); + msg->data = its_get_event_id(d); + msi_msg_set_addr(irq_data_get_msi_desc(d), msg, + its_dev->its->get_msi_base(its_dev)); } static int its_irq_set_irqchip_state(struct irq_data *d, diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index 3fe870f8ee17..a6510128611e 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -147,22 +147,18 @@ static const struct irq_domain_ops mbi_domain_ops = { static void mbi_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) { - msg[0].address_hi = upper_32_bits(mbi_phys_base + GICD_SETSPI_NSR); - msg[0].address_lo = lower_32_bits(mbi_phys_base + GICD_SETSPI_NSR); msg[0].data = data->parent_data->hwirq; - - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); + msi_msg_set_addr(irq_data_get_msi_desc(data), &msg[0], + mbi_phys_base + GICD_SETSPI_NSR); } static void mbi_compose_mbi_msg(struct irq_data *data, struct msi_msg *msg) { mbi_compose_msi_msg(data, msg); - msg[1].address_hi = upper_32_bits(mbi_phys_base + GICD_CLRSPI_NSR); - msg[1].address_lo = lower_32_bits(mbi_phys_base + GICD_CLRSPI_NSR); msg[1].data = data->parent_data->hwirq; - - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), &msg[1]); + msi_msg_set_addr(irq_data_get_msi_desc(data), &msg[1], + mbi_phys_base + GICD_CLRSPI_NSR); } static bool mbi_init_dev_msi_info(struct device *dev, struct irq_domain *domain, diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index c0e1aafe468c..3cb80796cc7c 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -87,8 +87,6 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { struct ls_scfg_msi *msi_data = irq_data_get_irq_chip_data(data); - msg->address_hi = upper_32_bits(msi_data->msiir_addr); - msg->address_lo = lower_32_bits(msi_data->msiir_addr); msg->data = data->hwirq; if (msi_affinity_flag) { @@ -98,7 +96,8 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) msg->data |= cpumask_first(mask); } - iommu_dma_compose_msi_msg(irq_data_get_msi_desc(data), msg); + msi_msg_set_addr(irq_data_get_msi_desc(data), msg, + msi_data->msiir_addr); } static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 38c65e92ecd0..caee952febd4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1508,7 +1508,6 @@ static inline void iommu_debugfs_setup(void) {} int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); #else /* CONFIG_IOMMU_DMA */ @@ -1524,11 +1523,6 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a { return 0; } - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ -} - #endif /* CONFIG_IOMMU_DMA */ /* diff --git a/include/linux/msi.h b/include/linux/msi.h index fc4f3774c3af..8d97b890faec 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -299,6 +299,34 @@ static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, u64 msi_io #endif } +/** + * msi_msg_set_addr() - Set MSI address in an MSI message + * + * @desc: MSI descriptor that may carry an IOVA base address for MSI via @iommu_msi_iova/shift + * @msg: Target MSI message to set its address_hi and address_lo + * @msi_addr: Physical address to set the MSI message + * + * Notes: + * - Override @msi_addr using the IOVA base address in the @desc if @iommu_msi_shift is set + * - Otherwise, simply set @msi_addr to @msg + */ +static inline void msi_msg_set_addr(struct msi_desc *desc, struct msi_msg *msg, + phys_addr_t msi_addr) +{ +#ifdef CONFIG_IRQ_MSI_IOMMU + if (desc->iommu_msi_shift) { + u64 msi_iova = desc->iommu_msi_iova << desc->iommu_msi_shift; + + msg->address_hi = upper_32_bits(msi_iova); + msg->address_lo = lower_32_bits(msi_iova) | + (msi_addr & ((1 << desc->iommu_msi_shift) - 1)); + return; + } +#endif + msg->address_hi = upper_32_bits(msi_addr); + msg->address_lo = lower_32_bits(msi_addr); +} + int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, struct msi_desc *init_desc); /** -- cgit v1.2.3