diff options
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/Kconfig | 6 | ||||
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 17 | ||||
-rw-r--r-- | drivers/pci/controller/vmd.c | 20 | ||||
-rw-r--r-- | drivers/pci/msi/api.c | 6 | ||||
-rw-r--r-- | drivers/pci/msi/msi.c | 204 | ||||
-rw-r--r-- | drivers/pci/pci-driver.c | 6 | ||||
-rw-r--r-- | drivers/pci/pci.h | 9 | ||||
-rw-r--r-- | drivers/pci/tph.c | 44 |
8 files changed, 173 insertions, 139 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 2fbd379923fd..5c3054aaec8c 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -203,6 +203,12 @@ config PCI_P2PDMA P2P DMA transactions must be between devices behind the same root port. + Enabling this option will reduce the entropy of x86 KASLR memory + regions. For example - on a 46 bit system, the entropy goes down + from 16 bits to 15 bits. The actual reduction in entropy depends + on the physical address bits, on processor features, kernel config + (5 level page table) and physical memory present on the system. + If unsure, say N. config PCI_LABEL diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 6084b38bdda1..178da6b9fc33 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1757,8 +1757,7 @@ static int hv_compose_multi_msi_req_get_cpu(void) spin_lock_irqsave(&multi_msi_cpu_lock, flags); - cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, - false); + cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask); cpu = cpu_next; spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); @@ -3976,24 +3975,18 @@ static int hv_pci_restore_msi_msg(struct pci_dev *pdev, void *arg) { struct irq_data *irq_data; struct msi_desc *entry; - int ret = 0; if (!pdev->msi_enabled && !pdev->msix_enabled) return 0; - msi_lock_descs(&pdev->dev); + guard(msi_descs_lock)(&pdev->dev); msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) { irq_data = irq_get_irq_data(entry->irq); - if (WARN_ON_ONCE(!irq_data)) { - ret = -EINVAL; - break; - } - + if (WARN_ON_ONCE(!irq_data)) + return -EINVAL; hv_compose_msi_msg(irq_data, &entry->msg); } - msi_unlock_descs(&pdev->dev); - - return ret; + return 0; } /* diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 9d9596947350..e619accca49d 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -17,6 +17,8 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <xen/xen.h> + #include <asm/irqdomain.h> #define VMD_CFGBAR 0 @@ -970,6 +972,24 @@ static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) struct vmd_dev *vmd; int err; + if (xen_domain()) { + /* + * Xen doesn't have knowledge about devices in the VMD bus + * because the config space of devices behind the VMD bridge is + * not known to Xen, and hence Xen cannot discover or configure + * them in any way. + * + * Bypass of MSI remapping won't work in that case as direct + * write by Linux to the MSI entries won't result in functional + * interrupts, as Xen is the entity that manages the host + * interrupt controller and must configure interrupts. However + * multiplexing of interrupts by the VMD bridge will work under + * Xen, so force the usage of that mode which must always be + * supported by VMD bridges. + */ + features &= ~VMD_FEAT_CAN_BYPASS_MSI_REMAP; + } + if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) return -ENOMEM; diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index b956ce591f96..d89f491afdf0 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -53,10 +53,9 @@ void pci_disable_msi(struct pci_dev *dev) if (!pci_msi_enabled() || !dev || !dev->msi_enabled) return; - msi_lock_descs(&dev->dev); + guard(msi_descs_lock)(&dev->dev); pci_msi_shutdown(dev); pci_free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msi); @@ -196,10 +195,9 @@ void pci_disable_msix(struct pci_dev *dev) if (!pci_msi_enabled() || !dev || !dev->msix_enabled) return; - msi_lock_descs(&dev->dev); + guard(msi_descs_lock)(&dev->dev); pci_msix_shutdown(dev); pci_free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msix); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 2f647cac4cae..d74162880d83 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -10,12 +10,12 @@ #include <linux/err.h> #include <linux/export.h> #include <linux/irq.h> +#include <linux/irqdomain.h> #include "../pci.h" #include "msi.h" int pci_msi_enable = 1; -int pci_msi_ignore_mask; /** * pci_msi_supported - check whether MSI may be enabled on a device @@ -285,6 +285,8 @@ static void pci_msi_set_enable(struct pci_dev *dev, int enable) static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) { + const struct irq_domain *d = dev_get_msi_domain(&dev->dev); + const struct msi_domain_info *info = d->host_data; struct msi_desc desc; u16 control; @@ -295,8 +297,7 @@ static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, /* Lies, damned lies, and MSIs */ if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) control |= PCI_MSI_FLAGS_MASKBIT; - /* Respect XEN's mask disabling */ - if (pci_msi_ignore_mask) + if (info->flags & MSI_FLAG_NO_MASK) control &= ~PCI_MSI_FLAGS_MASKBIT; desc.nvec_used = nvec; @@ -336,41 +337,11 @@ static int msi_verify_entries(struct pci_dev *dev) return !entry ? 0 : -EIO; } -/** - * msi_capability_init - configure device's MSI capability structure - * @dev: pointer to the pci_dev data structure of MSI device function - * @nvec: number of interrupts to allocate - * @affd: description of automatic IRQ affinity assignments (may be %NULL) - * - * Setup the MSI capability structure of the device with the requested - * number of interrupts. A return value of zero indicates the successful - * setup of an entry with the new MSI IRQ. A negative return value indicates - * an error, and a positive return value indicates the number of interrupts - * which could have been allocated. - */ -static int msi_capability_init(struct pci_dev *dev, int nvec, - struct irq_affinity *affd) +static int __msi_capability_init(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) { - struct irq_affinity_desc *masks = NULL; + int ret = msi_setup_msi_desc(dev, nvec, masks); struct msi_desc *entry, desc; - int ret; - - /* Reject multi-MSI early on irq domain enabled architectures */ - if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) - return 1; - /* - * Disable MSI during setup in the hardware, but mark it enabled - * so that setup code can evaluate it. - */ - pci_msi_set_enable(dev, 0); - dev->msi_enabled = 1; - - if (affd) - masks = irq_create_affinity_masks(nvec, affd); - - msi_lock_descs(&dev->dev); - ret = msi_setup_msi_desc(dev, nvec, masks); if (ret) goto fail; @@ -399,19 +370,48 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, pcibios_free_irq(dev); dev->irq = entry->irq; - goto unlock; - + return 0; err: pci_msi_unmask(&desc, msi_multi_mask(&desc)); pci_free_msi_irqs(dev); fail: dev->msi_enabled = 0; -unlock: - msi_unlock_descs(&dev->dev); - kfree(masks); return ret; } +/** + * msi_capability_init - configure device's MSI capability structure + * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: number of interrupts to allocate + * @affd: description of automatic IRQ affinity assignments (may be %NULL) + * + * Setup the MSI capability structure of the device with the requested + * number of interrupts. A return value of zero indicates the successful + * setup of an entry with the new MSI IRQ. A negative return value indicates + * an error, and a positive return value indicates the number of interrupts + * which could have been allocated. + */ +static int msi_capability_init(struct pci_dev *dev, int nvec, + struct irq_affinity *affd) +{ + /* Reject multi-MSI early on irq domain enabled architectures */ + if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) + return 1; + + /* + * Disable MSI during setup in the hardware, but mark it enabled + * so that setup code can evaluate it. + */ + pci_msi_set_enable(dev, 0); + dev->msi_enabled = 1; + + struct irq_affinity_desc *masks __free(kfree) = + affd ? irq_create_affinity_masks(nvec, affd) : NULL; + + guard(msi_descs_lock)(&dev->dev); + return __msi_capability_init(dev, nvec, masks); +} + int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd) { @@ -604,12 +604,15 @@ static void __iomem *msix_map_region(struct pci_dev *dev, */ void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) { + const struct irq_domain *d = dev_get_msi_domain(&dev->dev); + const struct msi_domain_info *info = d->host_data; + desc->nvec_used = 1; desc->pci.msi_attrib.is_msix = 1; desc->pci.msi_attrib.is_64 = 1; desc->pci.msi_attrib.default_irq = dev->irq; desc->pci.mask_base = dev->msix_base; - desc->pci.msi_attrib.can_mask = !pci_msi_ignore_mask && + desc->pci.msi_attrib.can_mask = !(info->flags & MSI_FLAG_NO_MASK) && !desc->pci.msi_attrib.is_virtual; if (desc->pci.msi_attrib.can_mask) { @@ -659,47 +662,45 @@ static void msix_mask_all(void __iomem *base, int tsize) u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; int i; - if (pci_msi_ignore_mask) - return; - for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } -static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd) +static int __msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity_desc *masks) { - struct irq_affinity_desc *masks = NULL; - int ret; - - if (affd) - masks = irq_create_affinity_masks(nvec, affd); + int ret = msix_setup_msi_descs(dev, entries, nvec, masks); - msi_lock_descs(&dev->dev); - ret = msix_setup_msi_descs(dev, entries, nvec, masks); if (ret) - goto out_free; + goto fail; ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); if (ret) - goto out_free; + goto fail; /* Check if all MSI entries honor device restrictions */ ret = msi_verify_entries(dev); if (ret) - goto out_free; + goto fail; msix_update_entries(dev, entries); - goto out_unlock; + return 0; -out_free: +fail: pci_free_msi_irqs(dev); -out_unlock: - msi_unlock_descs(&dev->dev); - kfree(masks); return ret; } +static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity *affd) +{ + struct irq_affinity_desc *masks __free(kfree) = + affd ? irq_create_affinity_masks(nvec, affd) : NULL; + + guard(msi_descs_lock)(&dev->dev); + return __msix_setup_interrupts(dev, entries, nvec, masks); +} + /** * msix_capability_init - configure device's MSI-X capability * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -714,6 +715,8 @@ out_unlock: static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { + const struct irq_domain *d = dev_get_msi_domain(&dev->dev); + const struct msi_domain_info *info = d->host_data; int ret, tsize; u16 control; @@ -744,15 +747,17 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Disable INTX */ pci_intx_for_msi(dev, 0); - /* - * Ensure that all table entries are masked to prevent - * stale entries from firing in a crash kernel. - * - * Done late to deal with a broken Marvell NVME device - * which takes the MSI-X mask bits into account even - * when MSI-X is disabled, which prevents MSI delivery. - */ - msix_mask_all(dev->msix_base, tsize); + if (!(info->flags & MSI_FLAG_NO_MASK)) { + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(dev->msix_base, tsize); + } pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); @@ -871,13 +876,13 @@ void __pci_restore_msix_state(struct pci_dev *dev) write_msg = arch_restore_msi_irqs(dev); - msi_lock_descs(&dev->dev); - msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { - if (write_msg) - __pci_write_msi_msg(entry, &entry->msg); - pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); + scoped_guard (msi_descs_lock, &dev->dev) { + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { + if (write_msg) + __pci_write_msi_msg(entry, &entry->msg); + pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); + } } - msi_unlock_descs(&dev->dev); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); } @@ -916,6 +921,53 @@ void pci_free_msi_irqs(struct pci_dev *dev) } } +#ifdef CONFIG_PCIE_TPH +/** + * pci_msix_write_tph_tag - Update the TPH tag for a given MSI-X vector + * @pdev: The PCIe device to update + * @index: The MSI-X index to update + * @tag: The tag to write + * + * Returns: 0 on success, error code on failure + */ +int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + struct msi_desc *msi_desc; + struct irq_desc *irq_desc; + unsigned int virq; + + if (!pdev->msix_enabled) + return -ENXIO; + + guard(msi_descs_lock)(&pdev->dev); + virq = msi_get_virq(&pdev->dev, index); + if (!virq) + return -ENXIO; + /* + * This is a horrible hack, but short of implementing a PCI + * specific interrupt chip callback and a huge pile of + * infrastructure, this is the minor nuissance. It provides the + * protection against concurrent operations on this entry and keeps + * the control word cache in sync. + */ + irq_desc = irq_to_desc(virq); + if (!irq_desc) + return -ENXIO; + + guard(raw_spinlock_irq)(&irq_desc->lock); + msi_desc = irq_data_get_msi_desc(&irq_desc->irq_data); + if (!msi_desc || msi_desc->pci.msi_attrib.is_virtual) + return -ENXIO; + + msi_desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_ST; + msi_desc->pci.msix_ctrl |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); + pci_msix_write_vector_ctrl(msi_desc, msi_desc->pci.msix_ctrl); + /* Flush the write */ + readl(pci_msix_desc_addr(msi_desc)); + return 0; +} +#endif + /* Misc. infrastructure */ struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f57ea36d125d..02726f36beb5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -812,8 +812,7 @@ static int pci_pm_suspend(struct device *dev) * suspend callbacks can cope with runtime-suspended devices, it is * better to resume the device from runtime suspend here. */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - pci_dev_need_resume(pci_dev)) { + if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { @@ -1151,8 +1150,7 @@ static int pci_pm_poweroff(struct device *dev) } /* The reason to do that is the same as in pci_pm_suspend(). */ - if (!dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND) || - pci_dev_need_resume(pci_dev)) { + if (!dev_pm_smart_suspend(dev) || pci_dev_need_resume(pci_dev)) { pm_runtime_resume(dev); pci_dev->state_saved = false; } else { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 01e51db8d285..2e9cf26a9ee9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -989,6 +989,15 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name); void pcim_release_region(struct pci_dev *pdev, int bar); +#ifdef CONFIG_PCI_MSI +int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag); +#else +static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int index, u16 tag) +{ + return -ENODEV; +} +#endif + /* * Config Address for PCI Configuration Mechanism #1 * diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 07de59ca2ebf..77fce5e1b830 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -204,48 +204,6 @@ static u8 get_rp_completer_type(struct pci_dev *pdev) return FIELD_GET(PCI_EXP_DEVCAP2_TPH_COMP_MASK, reg); } -/* Write ST to MSI-X vector control reg - Return 0 if OK, otherwise -errno */ -static int write_tag_to_msix(struct pci_dev *pdev, int msix_idx, u16 tag) -{ -#ifdef CONFIG_PCI_MSI - struct msi_desc *msi_desc = NULL; - void __iomem *vec_ctrl; - u32 val; - int err = 0; - - msi_lock_descs(&pdev->dev); - - /* Find the msi_desc entry with matching msix_idx */ - msi_for_each_desc(msi_desc, &pdev->dev, MSI_DESC_ASSOCIATED) { - if (msi_desc->msi_index == msix_idx) - break; - } - - if (!msi_desc) { - err = -ENXIO; - goto err_out; - } - - /* Get the vector control register (offset 0xc) pointed by msix_idx */ - vec_ctrl = pdev->msix_base + msix_idx * PCI_MSIX_ENTRY_SIZE; - vec_ctrl += PCI_MSIX_ENTRY_VECTOR_CTRL; - - val = readl(vec_ctrl); - val &= ~PCI_MSIX_ENTRY_CTRL_ST; - val |= FIELD_PREP(PCI_MSIX_ENTRY_CTRL_ST, tag); - writel(val, vec_ctrl); - - /* Read back to flush the update */ - val = readl(vec_ctrl); - -err_out: - msi_unlock_descs(&pdev->dev); - return err; -#else - return -ENODEV; -#endif -} - /* Write tag to ST table - Return 0 if OK, otherwise -errno */ static int write_tag_to_st_table(struct pci_dev *pdev, int index, u16 tag) { @@ -346,7 +304,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) switch (loc) { case PCI_TPH_LOC_MSIX: - err = write_tag_to_msix(pdev, index, tag); + err = pci_msix_write_tph_tag(pdev, index, tag); break; case PCI_TPH_LOC_CAP: err = write_tag_to_st_table(pdev, index, tag); |