diff options
Diffstat (limited to 'drivers/iommu')
31 files changed, 930 insertions, 738 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index ec1b5e32b972..cd750f512dee 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -154,7 +154,6 @@ config IOMMU_DMA select DMA_OPS_HELPERS select IOMMU_API select IOMMU_IOVA - select IRQ_MSI_IOMMU select NEED_SG_DMA_LENGTH select NEED_SG_DMA_FLAGS if SWIOTLB @@ -483,8 +482,7 @@ config MTK_IOMMU config MTK_IOMMU_V1 tristate "MediaTek IOMMU Version 1 (M4U gen1) Support" - depends on ARM - depends on ARCH_MEDIATEK || COMPILE_TEST + depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST select ARM_DMA_USE_IOMMU select IOMMU_API select MEMORY diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 68debf5ee2d7..220c598b7e14 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -47,7 +47,6 @@ extern unsigned long amd_iommu_pgsize_bitmap; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); struct protection_domain *protection_domain_alloc(void); -void protection_domain_free(struct protection_domain *domain); struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, struct mm_struct *mm); void amd_iommu_domain_free(struct iommu_domain *dom); @@ -176,12 +175,11 @@ void amd_iommu_apply_ivrs_quirks(void); #else static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); - -#endif - -struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid); struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid); + +#endif /* AMD_IOMMU_H */ diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 23caea22f8dc..5089b58e528a 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -112,6 +112,10 @@ #define FEATURE_SNPAVICSUP_GAM(x) \ (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) +#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8) +#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \ + (FIELD_GET(FEATURE_NUM_INT_REMAP_SUP, x) == 0x1) + /* Note: * The current driver only support 16-bit PASID. * Currently, hardware only implement upto 16-bit PASID @@ -175,13 +179,16 @@ #define CONTROL_GAM_EN 25 #define CONTROL_GALOG_EN 28 #define CONTROL_GAINT_EN 29 +#define CONTROL_NUM_INT_REMAP_MODE 43 +#define CONTROL_NUM_INT_REMAP_MODE_MASK 0x03 +#define CONTROL_NUM_INT_REMAP_MODE_2K 0x01 #define CONTROL_EPH_EN 45 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 #define CONTROL_IRTCACHEDIS 59 #define CONTROL_SNPAVIC_EN 61 -#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT) +#define CTRL_INV_TO_MASK 7 #define CTRL_INV_TO_NONE 0 #define CTRL_INV_TO_1MS 1 #define CTRL_INV_TO_10MS 2 @@ -309,15 +316,13 @@ #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) #define DTE_IRQ_REMAP_ENABLE 1ULL -/* - * AMD IOMMU hardware only support 512 IRTEs despite - * the architectural limitation of 2048 entries. - */ -#define DTE_INTTAB_ALIGNMENT 128 -#define DTE_INTTABLEN_VALUE 9ULL -#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1) #define DTE_INTTABLEN_MASK (0xfULL << 1) -#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE) +#define DTE_INTTABLEN_VALUE_512 9ULL +#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1) +#define MAX_IRQS_PER_TABLE_512 BIT(DTE_INTTABLEN_VALUE_512) +#define DTE_INTTABLEN_VALUE_2K 11ULL +#define DTE_INTTABLEN_2K (DTE_INTTABLEN_VALUE_2K << 1) +#define MAX_IRQS_PER_TABLE_2K BIT(DTE_INTTABLEN_VALUE_2K) #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 @@ -492,9 +497,6 @@ extern const struct iommu_ops amd_iommu_ops; /* IVRS indicates that pre-boot remapping was enabled */ extern bool amdr_ivrs_remap_support; -/* kmem_cache to get tables with 128 byte alignement */ -extern struct kmem_cache *amd_iommu_irq_cache; - #define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff) #define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff) #define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \ @@ -851,6 +853,7 @@ struct iommu_dev_data { struct device *dev; u16 devid; /* PCI Device ID */ + unsigned int max_irqs; /* Maximum IRQs supported by device */ u32 max_pasids; /* Max supported PASIDs */ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ int ats_qdep; @@ -928,9 +931,6 @@ struct unity_map_entry { * Data structures for device handling */ -/* size of the dma_ops aperture as power of 2 */ -extern unsigned amd_iommu_aperture_order; - extern bool amd_iommu_force_isolation; /* Max levels of glxval supported */ diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index cb536d372b12..dd9e26b7b718 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,7 +12,6 @@ #include <linux/acpi.h> #include <linux/list.h> #include <linux/bitmap.h> -#include <linux/slab.h> #include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/msi.h> @@ -219,7 +218,6 @@ static bool __initdata cmdline_maps; static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); -static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg); static bool amd_iommu_pre_enabled = true; @@ -412,33 +410,26 @@ static void iommu_set_device_table(struct amd_iommu *iommu) &entry, sizeof(entry)); } -/* Generic functions to enable/disable certain features of the IOMMU. */ -void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) +static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift) { u64 ctrl; ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl |= (1ULL << bit); + mask <<= shift; + ctrl &= ~mask; + ctrl |= (val << shift) & mask; writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); } -static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) +/* Generic functions to enable/disable certain features of the IOMMU. */ +void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~(1ULL << bit); - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 1ULL, 1ULL, bit); } -static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout) +static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit) { - u64 ctrl; - - ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); - ctrl &= ~CTRL_INV_TO_MASK; - ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK; - writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); + iommu_feature_set(iommu, 0ULL, 1ULL, bit); } /* Function to enable the hardware */ @@ -1069,7 +1060,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; if (irq_v && (int_ctl || int_tab_len)) { if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_INTTABLEN)) { + (int_tab_len != DTE_INTTABLEN_512 && + int_tab_len != DTE_INTTABLEN_2K)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); memunmap(old_devtb); return false; @@ -2652,7 +2644,7 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); /* Set IOTLB invalidation timeout to 1s */ - iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT); /* Enable Enhanced Peripheral Page Request Handling */ if (check_feature(FEATURE_EPHSUP)) @@ -2745,6 +2737,17 @@ static void iommu_enable_irtcachedis(struct amd_iommu *iommu) iommu->irtcachedis_enabled ? "disabled" : "enabled"); } +static void iommu_enable_2k_int(struct amd_iommu *iommu) +{ + if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + return; + + iommu_feature_set(iommu, + CONTROL_NUM_INT_REMAP_MODE_2K, + CONTROL_NUM_INT_REMAP_MODE_MASK, + CONTROL_NUM_INT_REMAP_MODE); +} + static void early_enable_iommu(struct amd_iommu *iommu) { iommu_disable(iommu); @@ -2757,6 +2760,7 @@ static void early_enable_iommu(struct amd_iommu *iommu) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_enable(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2813,6 +2817,7 @@ static void early_enable_iommus(void) iommu_enable_ga(iommu); iommu_enable_xt(iommu); iommu_enable_irtcachedis(iommu); + iommu_enable_2k_int(iommu); iommu_set_device_table(iommu); amd_iommu_flush_all_caches(iommu); } @@ -2939,9 +2944,6 @@ static struct syscore_ops amd_iommu_syscore_ops = { static void __init free_iommu_resources(void) { - kmem_cache_destroy(amd_iommu_irq_cache); - amd_iommu_irq_cache = NULL; - free_iommu_all(); free_pci_segments(); } @@ -3040,7 +3042,7 @@ static void __init ivinfo_init(void *ivrs) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; - int remap_cache_sz, ret; + int ret; acpi_status status; if (!amd_iommu_detected) @@ -3102,22 +3104,7 @@ static int __init early_amd_iommu_init(void) if (amd_iommu_irq_remap) { struct amd_iommu_pci_seg *pci_seg; - /* - * Interrupt remapping enabled, create kmem_cache for the - * remapping tables. - */ ret = -ENOMEM; - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32); - else - remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); - amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", - remap_cache_sz, - DTE_INTTAB_ALIGNMENT, - 0, NULL); - if (!amd_iommu_irq_cache) - goto out; - for_each_pci_segment(pci_seg) { if (alloc_irq_lookup_table(pci_seg)) goto out; diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index f3399087859f..26cf562dde11 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -47,13 +47,6 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, return fpte; } -/**************************************************************************** - * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - static void free_pt_page(u64 *pt, struct list_head *freelist) { struct page *p = virt_to_page(pt); diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index c616de2c5926..a56a27396305 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd, iova, map_size, gfp, &updated); if (!pte) { - ret = -EINVAL; + ret = -ENOMEM; goto out; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cd5116d8c3b2..be8761bbef0f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -75,8 +75,6 @@ struct iommu_cmd { */ DEFINE_IDA(pdom_ids); -struct kmem_cache *amd_iommu_irq_cache; - static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev); @@ -868,7 +866,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) int type, devid, flags, tag; volatile u32 *event = __evt; int count = 0; - u64 address; + u64 address, ctrl; u32 pasid; retry: @@ -878,6 +876,7 @@ retry: (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; + ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET); if (type == 0) { /* Did we hit the erratum? */ @@ -899,6 +898,7 @@ retry: dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n", iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); + dev_err(dev, "Control Reg : 0x%llx\n", ctrl); dump_dte_entry(iommu, devid); break; case EVENT_TYPE_DEV_TAB_ERR: @@ -2394,8 +2394,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) } out_err: + iommu_completion_wait(iommu); + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K; + else + dev_data->max_irqs = MAX_IRQS_PER_TABLE_512; + if (dev_is_pci(dev)) pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT); @@ -2432,15 +2438,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) * *****************************************************************************/ -void protection_domain_free(struct protection_domain *domain) -{ - WARN_ON(!list_empty(&domain->dev_list)); - if (domain->domain.type & __IOMMU_DOMAIN_PAGING) - free_io_pgtable_ops(&domain->iop.pgtbl.ops); - pdom_id_free(domain->id); - kfree(domain); -} - static void protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); @@ -2578,7 +2575,11 @@ void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - protection_domain_free(domain); + WARN_ON(!list_empty(&domain->dev_list)); + if (domain->domain.type & __IOMMU_DOMAIN_PAGING) + free_io_pgtable_ops(&domain->iop.pgtbl.ops); + pdom_id_free(domain->id); + kfree(domain); } static int blocked_domain_attach_device(struct iommu_domain *domain, @@ -3081,6 +3082,13 @@ out: raw_spin_unlock_irqrestore(&iommu->lock, flags); } +static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data) +{ + if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K) + return DTE_INTTABLEN_2K; + return DTE_INTTABLEN_512; +} + static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, struct irq_remap_table *table) { @@ -3095,7 +3103,7 @@ static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid, new &= ~DTE_IRQ_PHYS_ADDR_MASK; new |= iommu_virt_to_phys(table->table); new |= DTE_IRQ_REMAP_INTCTL; - new |= DTE_INTTABLEN; + new |= iommu_get_int_tablen(dev_data); new |= DTE_IRQ_REMAP_ENABLE; WRITE_ONCE(dte->data[2], new); @@ -3121,7 +3129,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) return table; } -static struct irq_remap_table *__alloc_irq_table(void) +static struct irq_remap_table *__alloc_irq_table(int nid, int order) { struct irq_remap_table *table; @@ -3129,19 +3137,13 @@ static struct irq_remap_table *__alloc_irq_table(void) if (!table) return NULL; - table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); + table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order); if (!table->table) { kfree(table); return NULL; } raw_spin_lock_init(&table->lock); - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) - memset(table->table, 0, - MAX_IRQS_PER_TABLE * sizeof(u32)); - else - memset(table->table, 0, - (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); return table; } @@ -3173,13 +3175,24 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, return 0; } +static inline size_t get_irq_table_size(unsigned int max_irqs) +{ + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + return max_irqs * sizeof(u32); + + return max_irqs * (sizeof(u64) * 2); +} + static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, - u16 devid, struct pci_dev *pdev) + u16 devid, struct pci_dev *pdev, + unsigned int max_irqs) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; struct amd_iommu_pci_seg *pci_seg; unsigned long flags; + int order = get_order(get_irq_table_size(max_irqs)); + int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; u16 alias; spin_lock_irqsave(&iommu_table_lock, flags); @@ -3198,7 +3211,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, spin_unlock_irqrestore(&iommu_table_lock, flags); /* Nothing there yet, allocate new irq remapping table */ - new_table = __alloc_irq_table(); + new_table = __alloc_irq_table(nid, order); if (!new_table) return NULL; @@ -3233,20 +3246,21 @@ out_unlock: spin_unlock_irqrestore(&iommu_table_lock, flags); if (new_table) { - kmem_cache_free(amd_iommu_irq_cache, new_table->table); + iommu_free_pages(new_table->table, order); kfree(new_table); } return table; } static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, - bool align, struct pci_dev *pdev) + bool align, struct pci_dev *pdev, + unsigned long max_irqs) { struct irq_remap_table *table; int index, c, alignment = 1; unsigned long flags; - table = alloc_irq_table(iommu, devid, pdev); + table = alloc_irq_table(iommu, devid, pdev, max_irqs); if (!table) return -ENODEV; @@ -3257,7 +3271,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count, /* Scan table for free entries */ for (index = ALIGN(table->min_index, alignment), c = 0; - index < MAX_IRQS_PER_TABLE;) { + index < max_irqs;) { if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; } else { @@ -3527,6 +3541,14 @@ static void fill_msi_msg(struct msi_msg *msg, u32 index) msg->data = index; msg->address_lo = 0; msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + /* + * The struct msi_msg.dest_mode_logical is used to set the DM bit + * in MSI Message Address Register. For device w/ 2K int-remap support, + * this is bit must be set to 1 regardless of the actual destination + * mode, which is signified by the IRTE[DM]. + */ + if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2)) + msg->arch_addr_lo.dest_mode_logical = true; msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; } @@ -3589,6 +3611,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct amd_ir_data *data = NULL; struct amd_iommu *iommu; struct irq_cfg *cfg; + struct iommu_dev_data *dev_data; + unsigned long max_irqs; int i, ret, devid, seg, sbdf; int index; @@ -3607,6 +3631,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!iommu) return -EINVAL; + dev_data = search_dev_data(iommu, devid); + max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512; + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); if (ret < 0) return ret; @@ -3614,7 +3641,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { struct irq_remap_table *table; - table = alloc_irq_table(iommu, devid, NULL); + table = alloc_irq_table(iommu, devid, NULL, max_irqs); if (table) { if (!table->min_index) { /* @@ -3635,9 +3662,11 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI); index = alloc_irq_index(iommu, devid, nr_irqs, align, - msi_desc_to_pci_dev(info->desc)); + msi_desc_to_pci_dev(info->desc), + max_irqs); } else { - index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL); + index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL, + max_irqs); } if (index < 0) { diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c index 11150cfd6718..77c8e9a91cbc 100644 --- a/drivers/iommu/amd/pasid.c +++ b/drivers/iommu/amd/pasid.c @@ -195,7 +195,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev, ret = mmu_notifier_register(&pdom->mn, mm); if (ret) { - protection_domain_free(pdom); + amd_iommu_domain_free(&pdom->domain); return ERR_PTR(ret); } diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 95ba3caeb401..e13501541fdd 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -36,7 +36,7 @@ #define DART_MAX_STREAMS 256 #define DART_MAX_TTBR 4 -#define MAX_DARTS_PER_DEVICE 2 +#define MAX_DARTS_PER_DEVICE 3 /* Common registers */ @@ -277,6 +277,9 @@ struct apple_dart_domain { * @streams: streams for this device */ struct apple_dart_master_cfg { + /* Intersection of DART capabilitles */ + u32 supports_bypass : 1; + struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE]; }; @@ -684,7 +687,7 @@ static int apple_dart_attach_dev_identity(struct iommu_domain *domain, struct apple_dart_stream_map *stream_map; int i; - if (!cfg->stream_maps[0].dart->supports_bypass) + if (!cfg->supports_bypass) return -EINVAL; for_each_stream_map(i, cfg, stream_map) @@ -792,20 +795,23 @@ static int apple_dart_of_xlate(struct device *dev, return -EINVAL; sid = args->args[0]; - if (!cfg) + if (!cfg) { cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) - return -ENOMEM; + if (!cfg) + return -ENOMEM; + /* Will be ANDed with DART capabilities */ + cfg->supports_bypass = true; + } dev_iommu_priv_set(dev, cfg); cfg_dart = cfg->stream_maps[0].dart; if (cfg_dart) { - if (cfg_dart->supports_bypass != dart->supports_bypass) - return -EINVAL; if (cfg_dart->pgsize != dart->pgsize) return -EINVAL; } + cfg->supports_bypass &= dart->supports_bypass; + for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (cfg->stream_maps[i].dart == dart) { set_bit(sid, cfg->stream_maps[i].sidmap); @@ -945,7 +951,7 @@ static int apple_dart_def_domain_type(struct device *dev) if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE) return IOMMU_DOMAIN_IDENTITY; - if (!cfg->stream_maps[0].dart->supports_bypass) + if (!cfg->supports_bypass) return IOMMU_DOMAIN_DMA; return 0; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index de205a34ffc6..8f439c265a23 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -79,8 +79,11 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu) static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu) { - if (pm_runtime_enabled(smmu->dev)) - pm_runtime_put_autosuspend(smmu->dev); + if (pm_runtime_enabled(smmu->dev)) { + pm_runtime_mark_last_busy(smmu->dev); + __pm_runtime_put_autosuspend(smmu->dev); + + } } static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu) @@ -1195,7 +1198,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) /* Looks ok, so add the device to the domain */ arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS, smmu_domain->cfg.cbndx, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); rpm_put: arm_smmu_rpm_put(smmu); return ret; @@ -1218,7 +1220,6 @@ static int arm_smmu_attach_dev_type(struct device *dev, return ret; arm_smmu_master_install_s2crs(cfg, type, 0, fwspec); - arm_smmu_rpm_use_autosuspend(smmu); arm_smmu_rpm_put(smmu); return 0; } @@ -1486,7 +1487,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) out_cfg_free: kfree(cfg); out_free: - iommu_fwspec_free(dev); return ERR_PTR(ret); } @@ -2246,6 +2246,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (dev->pm_domain) { pm_runtime_set_active(dev); pm_runtime_enable(dev); + arm_smmu_rpm_use_autosuspend(smmu); } return 0; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 2a9fa0c8cc00..0832998eca38 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -24,6 +24,7 @@ #include <linux/memremap.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/msi.h> #include <linux/of_iommu.h> #include <linux/pci.h> #include <linux/scatterlist.h> @@ -86,7 +87,6 @@ struct iommu_dma_cookie { struct iommu_domain *fq_domain; /* Options for dma-iommu use */ struct iommu_dma_options options; - struct mutex mutex; }; static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled); @@ -102,6 +102,9 @@ static int __init iommu_dma_forcedac_setup(char *str) } early_param("iommu.forcedac", iommu_dma_forcedac_setup); +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); + /* Number of entries per flush queue */ #define IOVA_DEFAULT_FQ_SIZE 256 #define IOVA_SINGLE_FQ_SIZE 32768 @@ -397,7 +400,7 @@ int iommu_get_dma_cookie(struct iommu_domain *domain) if (!domain->iova_cookie) return -ENOMEM; - mutex_init(&domain->iova_cookie->mutex); + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); return 0; } @@ -429,6 +432,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) cookie->msi_iova = base; domain->iova_cookie = cookie; + iommu_domain_set_sw_msi(domain, iommu_dma_sw_msi); return 0; } EXPORT_SYMBOL(iommu_get_msi_cookie); @@ -443,6 +447,11 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi, *tmp; +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) + if (domain->sw_msi != iommu_dma_sw_msi) + return; +#endif + if (!cookie) return; @@ -698,23 +707,20 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev domain->geometry.aperture_start >> order); /* start_pfn is always nonzero for an already-initialised domain */ - mutex_lock(&cookie->mutex); if (iovad->start_pfn) { if (1UL << order != iovad->granule || base_pfn != iovad->start_pfn) { pr_warn("Incompatible range for DMA domain\n"); - ret = -EFAULT; - goto done_unlock; + return -EFAULT; } - ret = 0; - goto done_unlock; + return 0; } init_iova_domain(iovad, 1UL << order, base_pfn); ret = iova_domain_init_rcaches(iovad); if (ret) - goto done_unlock; + return ret; iommu_dma_init_options(&cookie->options, dev); @@ -723,11 +729,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) domain->type = IOMMU_DOMAIN_DMA; - ret = iova_reserve_iommu_regions(dev, domain); - -done_unlock: - mutex_unlock(&cookie->mutex); - return ret; + return iova_reserve_iommu_regions(dev, domain); } /** @@ -1800,60 +1802,26 @@ out_free_page: return NULL; } -/** - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain - * @desc: MSI descriptor, will store the MSI page - * @msi_addr: MSI target address to be mapped - * - * Return: 0 on success or negative error code if the mapping failed. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +static int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_dma_msi_page *msi_page; - static DEFINE_MUTEX(msi_prepare_lock); /* see below */ + const struct iommu_dma_msi_page *msi_page; - if (!domain || !domain->iova_cookie) { - desc->iommu_cookie = NULL; + if (!domain->iova_cookie) { + msi_desc_set_iommu_msi_iova(desc, 0, 0); return 0; } - /* - * In fact the whole prepare operation should already be serialised by - * irq_domain_mutex further up the callchain, but that's pretty subtle - * on its own, so consider this locking as failsafe documentation... - */ - mutex_lock(&msi_prepare_lock); + iommu_group_mutex_assert(dev); msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); - mutex_unlock(&msi_prepare_lock); - - msi_desc_set_iommu_cookie(desc, msi_page); - if (!msi_page) return -ENOMEM; - return 0; -} -/** - * iommu_dma_compose_msi_msg() - Apply translation to an MSI message - * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() - * @msg: MSI message containing target physical address - */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) -{ - struct device *dev = msi_desc_to_dev(desc); - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - const struct iommu_dma_msi_page *msi_page; - - msi_page = msi_desc_get_iommu_cookie(desc); - - if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) - return; - - msg->address_hi = upper_32_bits(msi_page->iova); - msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; - msg->address_lo += lower_32_bits(msi_page->iova); + msi_desc_set_iommu_msi_iova( + desc, msi_page->iova, + ilog2(cookie_msi_granule(domain->iova_cookie))); + return 0; } static int iommu_dma_init(void) diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c index 2a86aa5d54c6..761ab647f372 100644 --- a/drivers/iommu/hyperv-iommu.c +++ b/drivers/iommu/hyperv-iommu.c @@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void) x86_init.hyper.msi_ext_dest_id()) return -ENODEV; - if (hv_root_partition) { + if (hv_root_partition()) { name = "HYPERV-ROOT-IR"; ops = &hyperv_root_ir_domain_ops; } else { @@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void) return -ENOMEM; } - if (hv_root_partition) + if (hv_root_partition()) return 0; /* The rest is only relevant to guests */ /* @@ -217,7 +217,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) status = hv_unmap_ioapic_interrupt(ioapic_id, &entry); if (status != HV_STATUS_SUCCESS) - pr_debug("%s: unexpected unmap status %lld\n", __func__, status); + hv_status_debug(status, "failed to unmap\n"); data->entry.ioapic_rte.as_uint64 = 0; data->entry.source = 0; /* Invalid source */ @@ -228,7 +228,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg) vector, &entry); if (status != HV_STATUS_SUCCESS) { - pr_err("%s: map hypercall failed, status %lld\n", __func__, status); + hv_status_err(status, "map failed\n"); return; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bf1f0c814348..ec2f385ae25b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -737,7 +737,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return NULL; domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + pteval = virt_to_phys(tmp_page) | DMA_PTE_READ | + DMA_PTE_WRITE; if (domain->use_first_level) pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; @@ -1172,32 +1173,59 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) return true; } -static void iommu_enable_pci_caps(struct device_domain_info *info) +static void iommu_enable_pci_ats(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_supported) return; pdev = to_pci_dev(info->dev); - if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) + if (!pci_ats_page_aligned(pdev)) + return; + + if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; } -static void iommu_disable_pci_caps(struct device_domain_info *info) +static void iommu_disable_pci_ats(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + pci_disable_ats(to_pci_dev(info->dev)); + info->ats_enabled = 0; +} + +static void iommu_enable_pci_pri(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_enabled || !info->pri_supported) return; pdev = to_pci_dev(info->dev); + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return; - if (info->ats_enabled) { - pci_disable_ats(pdev); - info->ats_enabled = 0; - } + if (pci_reset_pri(pdev)) + return; + + if (!pci_enable_pri(pdev, PRQ_DEPTH)) + info->pri_enabled = 1; +} + +static void iommu_disable_pci_pri(struct device_domain_info *info) +{ + if (!info->pri_enabled) + return; + + if (WARN_ON(info->iopf_refcount)) + iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); + + pci_disable_pri(to_pci_dev(info->dev)); + info->pri_enabled = 0; } static void intel_flush_iotlb_all(struct iommu_domain *domain) @@ -1556,12 +1584,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; + int ret; if (!dev_is_pci(dev)) return domain_context_mapping_one(domain, iommu, bus, devfn); - return pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapping_cb, domain); + ret = pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapping_cb, domain); + if (ret) + return ret; + + iommu_enable_pci_ats(info); + + return 0; } /* Return largest possible superpage level for a given mapping */ @@ -1748,7 +1783,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, did, true); + intel_context_flush_no_pasid(info, context, did); } int __domain_setup_first_level(struct intel_iommu *iommu, @@ -1843,8 +1878,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - iommu_enable_pci_caps(info); - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) goto out_block_translation; @@ -2871,16 +2904,19 @@ void intel_iommu_shutdown(void) if (no_iommu || dmar_disabled) return; - down_write(&dmar_global_lock); + /* + * All other CPUs were brought down, hotplug interrupts were disabled, + * no lock and RCU checking needed anymore + */ + list_for_each_entry(drhd, &dmar_drhd_units, list) { + iommu = drhd->iommu; - /* Disable PMRs explicitly here. */ - for_each_iommu(iommu, drhd) + /* Disable PMRs explicitly here. */ iommu_disable_protect_mem_regions(iommu); - /* Make sure the IOMMUs are switched off */ - intel_disable_iommus(); - - up_write(&dmar_global_lock); + /* Make sure the IOMMUs are switched off */ + iommu_disable_translation(iommu); + } } static struct intel_iommu *dev_to_intel_iommu(struct device *dev) @@ -3013,6 +3049,7 @@ static int __init probe_acpi_namespace_devices(void) if (dev->bus != &acpi_bus_type) continue; + up_read(&dmar_global_lock); adev = to_acpi_device(dev); mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, @@ -3022,6 +3059,7 @@ static int __init probe_acpi_namespace_devices(void) break; } mutex_unlock(&adev->physical_node_lock); + down_read(&dmar_global_lock); if (ret) return ret; @@ -3205,6 +3243,7 @@ static void domain_context_clear(struct device_domain_info *info) pci_for_each_dma_alias(to_pci_dev(info->dev), &domain_context_clear_one_cb, info); + iommu_disable_pci_ats(info); } /* @@ -3221,7 +3260,6 @@ void device_block_translation(struct device *dev) if (info->domain) cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); - iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, @@ -3756,6 +3794,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; + if (sm_supported(iommu)) + iommu_enable_pci_ats(info); + iommu_enable_pci_pri(info); + return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3772,6 +3814,9 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + iommu_disable_pci_pri(info); + iommu_disable_pci_ats(info); + if (info->pasid_enabled) { pci_disable_pasid(to_pci_dev(dev)); info->pasid_enabled = 0; @@ -3858,151 +3903,41 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -static int intel_iommu_enable_sva(struct device *dev) +int intel_iommu_enable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; - - if (!info || dmar_disabled) - return -EINVAL; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) - return -ENODEV; - - if (!info->pasid_enabled || !info->ats_enabled) - return -EINVAL; - - /* - * Devices having device-specific I/O fault handling should not - * support PCI/PRI. The IOMMU side has no means to check the - * capability of device-specific IOPF. Therefore, IOMMU can only - * default that if the device driver enables SVA on a non-PRI - * device, it will handle IOPF in its own way. - */ - if (!info->pri_supported) - return 0; - - /* Devices supporting PRI should have it enabled. */ - if (!info->pri_enabled) - return -EINVAL; - - return 0; -} - -static int context_flip_pri(struct device_domain_info *info, bool enable) -{ struct intel_iommu *iommu = info->iommu; - u8 bus = info->bus, devfn = info->devfn; - struct context_entry *context; - u16 did; - - spin_lock(&iommu->lock); - if (context_copied(iommu, bus, devfn)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - context = iommu_context_addr(iommu, bus, devfn, false); - if (!context || !context_present(context)) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - did = context_domain_id(context); - - if (enable) - context_set_sm_pre(context); - else - context_clear_sm_pre(context); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, did, true); - spin_unlock(&iommu->lock); - - return 0; -} - -static int intel_iommu_enable_iopf(struct device *dev) -{ - struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; int ret; - if (!pdev || !info || !info->ats_enabled || !info->pri_supported) + if (!info->pri_enabled) return -ENODEV; - if (info->pri_enabled) - return -EBUSY; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - /* PASID is required in PRG Response Message. */ - if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) - return -EINVAL; - - ret = pci_reset_pri(pdev); - if (ret) - return ret; + if (info->iopf_refcount) { + info->iopf_refcount++; + return 0; + } ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; - ret = context_flip_pri(info, true); - if (ret) - goto err_remove_device; - - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto err_clear_pri; - - info->pri_enabled = 1; + info->iopf_refcount = 1; return 0; -err_clear_pri: - context_flip_pri(info, false); -err_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } -static int intel_iommu_disable_iopf(struct device *dev) +void intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - if (!info->pri_enabled) - return -EINVAL; + if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) + return; - /* Disable new PRI reception: */ - context_flip_pri(info, false); + if (--info->iopf_refcount) + return; - /* - * Remove device from fault queue and acknowledge all outstanding - * PRQs to the device: - */ iopf_queue_remove_device(iommu->iopf_queue, dev); - - /* - * PCIe spec states that by clearing PRI enable bit, the Page - * Request Interface will not issue new page requests, but has - * outstanding page requests that have been transmitted or are - * queued for transmission. This is supposed to be called after - * the device driver has stopped DMA, all PASIDs have been - * unbound and the outstanding PRQs have been drained. - */ - pci_disable_pri(to_pci_dev(dev)); - info->pri_enabled = 0; - - return 0; } static int @@ -4013,7 +3948,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) return intel_iommu_enable_iopf(dev); case IOMMU_DEV_FEAT_SVA: - return intel_iommu_enable_sva(dev); + return 0; default: return -ENODEV; @@ -4025,7 +3960,8 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_disable_iopf(dev); + intel_iommu_disable_iopf(dev); + return 0; case IOMMU_DEV_FEAT_SVA: return 0; @@ -4410,13 +4346,10 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device if (dev_is_real_dma_subdevice(dev)) return 0; - if (sm_supported(iommu)) { + if (sm_supported(iommu)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); - if (!ret) - iommu_enable_pci_caps(info); - } else { + else ret = device_setup_pass_through(dev); - } return ret; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 6ea7bbe26b19..c4916886da5a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -774,6 +774,7 @@ struct device_domain_info { u8 ats_enabled:1; u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; + unsigned int iopf_refcount; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ @@ -953,25 +954,6 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH); } -/* VT-d pages must always be _smaller_ than MM pages. Otherwise things - are never going to work. */ -static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) -{ - return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); -} -static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) -{ - return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; -} -static inline unsigned long page_to_dma_pfn(struct page *pg) -{ - return mm_to_dma_pfn_start(page_to_pfn(pg)); -} -static inline unsigned long virt_to_dma_pfn(void *p) -{ - return page_to_dma_pfn(virt_to_page(p)); -} - static inline void context_set_present(struct context_entry *context) { context->lo |= 1; @@ -1304,9 +1286,8 @@ void cache_tag_flush_all(struct dmar_domain *domain); void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end); -void intel_context_flush_present(struct device_domain_info *info, - struct context_entry *context, - u16 did, bool affect_domains); +void intel_context_flush_no_pasid(struct device_domain_info *info, + struct context_entry *context, u16 did); int intel_iommu_enable_prq(struct intel_iommu *iommu); int intel_iommu_finish_prq(struct intel_iommu *iommu); @@ -1314,6 +1295,9 @@ void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt, struct iommu_page_response *msg); void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid); +int intel_iommu_enable_iopf(struct device *dev); +void intel_iommu_disable_iopf(struct device *dev); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); struct iommu_domain *intel_svm_domain_alloc(struct device *dev, diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index ad795c772f21..ea3ca5203919 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -25,11 +25,6 @@ #include "../irq_remapping.h" #include "../iommu-pages.h" -enum irq_mode { - IRQ_REMAPPING, - IRQ_POSTING, -}; - struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; @@ -49,8 +44,8 @@ struct irq_2_iommu { u16 irte_index; u16 sub_handle; u8 irte_mask; - enum irq_mode mode; bool posted_msi; + bool posted_vcpu; }; struct intel_ir_data { @@ -138,7 +133,6 @@ static int alloc_irte(struct intel_iommu *iommu, irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - irq_iommu->mode = IRQ_REMAPPING; } raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); @@ -193,8 +187,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, rc = qi_flush_iec(iommu, index, 0); - /* Update iommu mode according to the IRTE mode */ - irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; @@ -1169,7 +1161,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} #endif -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) +static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + + /* + * Don't modify IRTEs for IRQs that are being posted to vCPUs if the + * host CPU affinity changes. + */ + if (ir_data->irq_2_iommu.posted_vcpu && !force_host) + return; + + ir_data->irq_2_iommu.posted_vcpu = false; + + if (ir_data->irq_2_iommu.posted_msi) + intel_ir_reconfigure_irte_posted(irqd); + else + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); +} + +static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) { struct intel_ir_data *ir_data = irqd->chip_data; struct irte *irte = &ir_data->irte_entry; @@ -1182,10 +1193,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->vector = cfg->vector; irte->dest_id = IRTE_DEST(cfg->dest_apicid); - if (ir_data->irq_2_iommu.posted_msi) - intel_ir_reconfigure_irte_posted(irqd); - else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); + __intel_ir_reconfigure_irte(irqd, force_host); } /* @@ -1240,7 +1248,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) /* stop posting interrupts, back to the default mode */ if (!vcpu_pi_info) { - modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); + __intel_ir_reconfigure_irte(data, true); } else { struct irte irte_pi; @@ -1263,6 +1271,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); + ir_data->irq_2_iommu.posted_vcpu = true; modify_irte(&ir_data->irq_2_iommu, &irte_pi); } @@ -1489,6 +1498,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, struct intel_ir_data *data = irq_data->chip_data; struct irte entry; + WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu); + data->irq_2_iommu.posted_vcpu = false; + memset(&entry, 0, sizeof(entry)); modify_irte(&data->irq_2_iommu, &entry); } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb59a7d35958..7ee18bb48bd4 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -932,7 +932,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, did, false); + intel_context_flush_no_pasid(info, context, did); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -992,6 +992,8 @@ static int context_entry_set_pasid_table(struct context_entry *context, context_set_sm_dte(context); if (info->pasid_supported) context_set_pasid(context); + if (info->pri_supported) + context_set_sm_pre(context); context_set_fault_enable(context); context_set_present(context); @@ -1117,17 +1119,15 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info) /* * Cache invalidations after change in a context table entry that was present - * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If - * IOMMU is in scalable mode and all PASID table entries of the device were - * non-present, set flush_domains to false. Otherwise, true. + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). + * This helper can only be used when IOMMU is working in the legacy mode or + * IOMMU is in scalable mode but all PASID table entries of the device are + * non-present. */ -void intel_context_flush_present(struct device_domain_info *info, - struct context_entry *context, - u16 did, bool flush_domains) +void intel_context_flush_no_pasid(struct device_domain_info *info, + struct context_entry *context, u16 did) { struct intel_iommu *iommu = info->iommu; - struct pasid_entry *pte; - int i; /* * Device-selective context-cache invalidation. The Domain-ID field @@ -1150,30 +1150,5 @@ void intel_context_flush_present(struct device_domain_info *info, return; } - /* - * For scalable mode: - * - Domain-selective PASID-cache invalidation to affected domains - * - Domain-selective IOTLB invalidation to affected domains - * - Global Device-TLB invalidation to affected functions - */ - if (flush_domains) { - /* - * If the IOMMU is running in scalable mode and there might - * be potential PASID translations, the caller should hold - * the lock to ensure that context changes and cache flushes - * are atomic. - */ - assert_spin_locked(&iommu->lock); - for (i = 0; i < info->pasid_table->max_pasid; i++) { - pte = intel_pasid_get_entry(info->dev, i); - if (!pte || !pasid_pte_is_present(pte)) - continue; - - did = pasid_get_domain_id(pte); - qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); - iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); - } - } - __context_flush_dev_iotlb(info); } diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c index 064194399b38..5b6a64d96850 100644 --- a/drivers/iommu/intel/prq.c +++ b/drivers/iommu/intel/prq.c @@ -67,7 +67,7 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid) u16 sid, did; info = dev_iommu_priv_get(dev); - if (!info->pri_enabled) + if (!info->iopf_refcount) return; iommu = info->iommu; diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index f5569347591f..ba93123cb4eb 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -110,6 +110,41 @@ static const struct mmu_notifier_ops intel_mmuops = { .free_notifier = intel_mm_free_notifier, }; +static int intel_iommu_sva_supported(struct device *dev) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu; + + if (!info || dmar_disabled) + return -EINVAL; + + iommu = info->iommu; + if (!iommu) + return -EINVAL; + + if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) + return -ENODEV; + + if (!info->pasid_enabled || !info->ats_enabled) + return -EINVAL; + + /* + * Devices having device-specific I/O fault handling should not + * support PCI/PRI. The IOMMU side has no means to check the + * capability of device-specific IOPF. Therefore, IOMMU can only + * default that if the device driver enables SVA on a non-PRI + * device, it will handle IOPF in its own way. + */ + if (!info->pri_supported) + return 0; + + /* Devices supporting PRI should have it enabled. */ + if (!info->pri_enabled) + return -EINVAL; + + return 0; +} + static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid, struct iommu_domain *old) @@ -121,6 +156,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, unsigned long sflags; int ret = 0; + ret = intel_iommu_sva_supported(dev); + if (ret) + return ret; + dev_pasid = domain_add_dev_pasid(domain, dev, pasid); if (IS_ERR(dev_pasid)) return PTR_ERR(dev_pasid); @@ -161,6 +200,10 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev, struct dmar_domain *domain; int ret; + ret = intel_iommu_sva_supported(dev); + if (ret) + return ERR_PTR(ret); + domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return ERR_PTR(-ENOMEM); diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c index c004640640ee..06aca9ab52f9 100644 --- a/drivers/iommu/io-pgtable-dart.c +++ b/drivers/iommu/io-pgtable-dart.c @@ -135,7 +135,6 @@ static int dart_init_pte(struct dart_io_pgtable *data, pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0); pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff); - pte |= APPLE_DART1_PTE_PROT_SP_DIS; pte |= APPLE_DART_PTE_VALID; for (i = 0; i < num_entries; i++) @@ -211,6 +210,7 @@ static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data, dart_iopte pte = 0; if (data->iop.fmt == APPLE_DART) { + pte |= APPLE_DART1_PTE_PROT_SP_DIS; if (!(prot & IOMMU_WRITE)) pte |= APPLE_DART1_PTE_PROT_NO_WRITE; if (!(prot & IOMMU_READ)) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index de5b54eaa8bf..05fa6e682e88 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -17,6 +17,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +void dev_iommu_free(struct device *dev); + const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) @@ -24,8 +26,7 @@ static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwsp return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); } -int iommu_group_replace_domain(struct iommu_group *group, - struct iommu_domain *new_domain); +void iommu_fwspec_free(struct device *dev); int iommu_device_register_bus(struct iommu_device *iommu, const struct iommu_ops *ops, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 60aed01e54f2..9e1b444246f8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -45,6 +45,9 @@ static unsigned int iommu_def_domain_type __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); static u32 iommu_cmd_line __read_mostly; +/* Tags used with xa_tag_pointer() in group->pasid_array */ +enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 }; + struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; @@ -352,7 +355,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) return param; } -static void dev_iommu_free(struct device *dev) +void dev_iommu_free(struct device *dev) { struct dev_iommu *param = dev->iommu; @@ -404,14 +407,40 @@ EXPORT_SYMBOL_GPL(dev_iommu_priv_set); * Init the dev->iommu and dev->iommu_group in the struct device and get the * driver probed */ -static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) +static int iommu_init_device(struct device *dev) { + const struct iommu_ops *ops; struct iommu_device *iommu_dev; struct iommu_group *group; int ret; if (!dev_iommu_get(dev)) return -ENOMEM; + /* + * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing + * is buried in the bus dma_configure path. Properly unpicking that is + * still a big job, so for now just invoke the whole thing. The device + * already having a driver bound means dma_configure has already run and + * either found no IOMMU to wait for, or we're in its replay call right + * now, so either way there's no point calling it again. + */ + if (!dev->driver && dev->bus->dma_configure) { + mutex_unlock(&iommu_probe_device_lock); + dev->bus->dma_configure(dev); + mutex_lock(&iommu_probe_device_lock); + } + /* + * At this point, relevant devices either now have a fwspec which will + * match ops registered with a non-NULL fwnode, or we can reasonably + * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can + * be present, and that any of their registered instances has suitable + * ops for probing, and thus cheekily co-opt the same mechanism. + */ + ops = iommu_fwspec_ops(dev->iommu->fwspec); + if (!ops) { + ret = -ENODEV; + goto err_free; + } if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -514,23 +543,11 @@ DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops; struct iommu_group *group; struct group_device *gdev; int ret; /* - * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU - * instances with non-NULL fwnodes, and client devices should have been - * identified with a fwspec by this point. Otherwise, we can currently - * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can - * be present, and that any of their registered instances has suitable - * ops for probing, and thus cheekily co-opt the same mechanism. - */ - ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); - if (!ops) - return -ENODEV; - /* * Serialise to avoid races between IOMMU drivers registering in * parallel and/or the "replay" calls from ACPI/OF code via client * driver probe. Once the latter have been cleaned up we should @@ -543,9 +560,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list if (dev->iommu_group) return 0; - ret = iommu_init_device(dev, ops); + ret = iommu_init_device(dev); if (ret) return ret; + /* + * And if we do now see any replay calls, they would indicate someone + * misusing the dma_configure path outside bus code. + */ + if (dev->driver) + dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n"); group = dev->iommu_group; gdev = iommu_group_alloc_device(group, dev); @@ -2147,6 +2170,17 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev) return dev->iommu_group->default_domain; } +static void *iommu_make_pasid_array_entry(struct iommu_domain *domain, + struct iommu_attach_handle *handle) +{ + if (handle) { + handle->domain = domain; + return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE); + } + + return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN); +} + static int __iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { @@ -2187,32 +2221,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); -/** - * iommu_group_replace_domain - replace the domain that a group is attached to - * @group: IOMMU group that will be attached to the new domain - * @new_domain: new IOMMU domain to replace with - * - * This API allows the group to switch domains without being forced to go to - * the blocking domain in-between. - * - * If the currently attached domain is a core domain (e.g. a default_domain), - * it will act just like the iommu_attach_group(). - */ -int iommu_group_replace_domain(struct iommu_group *group, - struct iommu_domain *new_domain) -{ - int ret; - - if (!new_domain) - return -EINVAL; - - mutex_lock(&group->mutex); - ret = __iommu_group_set_domain(group, new_domain); - mutex_unlock(&group->mutex); - return ret; -} -EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, "IOMMUFD_INTERNAL"); - static int __iommu_device_set_domain(struct iommu_group *group, struct device *dev, struct iommu_domain *new_domain, @@ -2849,7 +2857,6 @@ void iommu_fwspec_free(struct device *dev) dev_iommu_fwspec_set(dev, NULL); } } -EXPORT_SYMBOL_GPL(iommu_fwspec_free); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids) { @@ -3097,6 +3104,11 @@ int iommu_device_use_default_domain(struct device *dev) return 0; mutex_lock(&group->mutex); + /* We may race against bus_iommu_probe() finalising groups here */ + if (!group->default_domain) { + ret = -EPROBE_DEFER; + goto unlock_out; + } if (group->owner_cnt) { if (group->domain != group->default_domain || group->owner || !xa_empty(&group->pasid_array)) { @@ -3374,6 +3386,7 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, struct iommu_group *group = dev->iommu_group; struct group_device *device; const struct iommu_ops *ops; + void *entry; int ret; if (!group) @@ -3397,16 +3410,31 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, } } - if (handle) - handle->domain = domain; + entry = iommu_make_pasid_array_entry(domain, handle); - ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL); + /* + * Entry present is a failure case. Use xa_insert() instead of + * xa_reserve(). + */ + ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL); if (ret) goto out_unlock; ret = __iommu_set_group_pasid(domain, group, pasid); - if (ret) - xa_erase(&group->pasid_array, pasid); + if (ret) { + xa_release(&group->pasid_array, pasid); + goto out_unlock; + } + + /* + * The xa_insert() above reserved the memory, and the group->mutex is + * held, this cannot fail. The new domain cannot be visible until the + * operation succeeds as we cannot tolerate PRIs becoming concurrently + * queued and then failing attach. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + pasid, entry, GFP_KERNEL))); + out_unlock: mutex_unlock(&group->mutex); return ret; @@ -3480,13 +3508,17 @@ struct iommu_attach_handle * iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) { struct iommu_attach_handle *handle; + void *entry; xa_lock(&group->pasid_array); - handle = xa_load(&group->pasid_array, pasid); - if (!handle) + entry = xa_load(&group->pasid_array, pasid); + if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) { handle = ERR_PTR(-ENOENT); - else if (type && handle->domain->type != type) - handle = ERR_PTR(-EBUSY); + } else { + handle = xa_untag_pointer(entry); + if (type && handle->domain->type != type) + handle = ERR_PTR(-EBUSY); + } xa_unlock(&group->pasid_array); return handle; @@ -3509,25 +3541,35 @@ int iommu_attach_group_handle(struct iommu_domain *domain, struct iommu_group *group, struct iommu_attach_handle *handle) { + void *entry; int ret; - if (handle) - handle->domain = domain; + if (!handle) + return -EINVAL; mutex_lock(&group->mutex); - ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + entry = iommu_make_pasid_array_entry(domain, handle); + ret = xa_insert(&group->pasid_array, + IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL); if (ret) - goto err_unlock; + goto out_unlock; ret = __iommu_attach_group(domain, group); - if (ret) - goto err_erase; - mutex_unlock(&group->mutex); + if (ret) { + xa_release(&group->pasid_array, IOMMU_NO_PASID); + goto out_unlock; + } - return 0; -err_erase: - xa_erase(&group->pasid_array, IOMMU_NO_PASID); -err_unlock: + /* + * The xa_insert() above reserved the memory, and the group->mutex is + * held, this cannot fail. The new domain cannot be visible until the + * operation succeeds as we cannot tolerate PRIs becoming concurrently + * queued and then failing attach. + */ + WARN_ON(xa_is_err(xa_store(&group->pasid_array, + IOMMU_NO_PASID, entry, GFP_KERNEL))); + +out_unlock: mutex_unlock(&group->mutex); return ret; } @@ -3557,33 +3599,34 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL"); * @new_domain: new IOMMU domain to replace with * @handle: attach handle * - * This is a variant of iommu_group_replace_domain(). It allows the caller to - * provide an attach handle for the new domain and use it when the domain is - * attached. + * This API allows the group to switch domains without being forced to go to + * the blocking domain in-between. It allows the caller to provide an attach + * handle for the new domain and use it when the domain is attached. + * + * If the currently attached domain is a core domain (e.g. a default_domain), + * it will act just like the iommu_attach_group_handle(). */ int iommu_replace_group_handle(struct iommu_group *group, struct iommu_domain *new_domain, struct iommu_attach_handle *handle) { - void *curr; + void *curr, *entry; int ret; - if (!new_domain) + if (!new_domain || !handle) return -EINVAL; mutex_lock(&group->mutex); - if (handle) { - ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); - if (ret) - goto err_unlock; - handle->domain = new_domain; - } + entry = iommu_make_pasid_array_entry(new_domain, handle); + ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); + if (ret) + goto err_unlock; ret = __iommu_group_set_domain(group, new_domain); if (ret) goto err_release; - curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL); WARN_ON(xa_is_err(curr)); mutex_unlock(&group->mutex); @@ -3596,3 +3639,32 @@ err_unlock: return ret; } EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL"); + +#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU) +/** + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain + * @desc: MSI descriptor, will store the MSI page + * @msi_addr: MSI target address to be mapped + * + * The implementation of sw_msi() should take msi_addr and map it to + * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the + * mapping information. + * + * Return: 0 on success or negative error code if the mapping failed. + */ +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommu_group *group = dev->iommu_group; + int ret = 0; + + if (!group) + return 0; + + mutex_lock(&group->mutex); + if (group->domain && group->domain->sw_msi) + ret = group->domain->sw_msi(group->domain, desc, msi_addr); + mutex_unlock(&group->mutex); + return ret; +} +#endif /* CONFIG_IRQ_MSI_IOMMU */ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index dfd0898fb6c1..4e107f69f951 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -5,6 +5,7 @@ #include <linux/iommufd.h> #include <linux/slab.h> #include <uapi/linux/iommufd.h> +#include <linux/msi.h> #include "../iommu-priv.h" #include "io_pagetable.h" @@ -293,36 +294,152 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) } EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD"); +/* + * Get a iommufd_sw_msi_map for the msi physical address requested by the irq + * layer. The mapping to IOVA is global to the iommufd file descriptor, every + * domain that is attached to a device using the same MSI parameters will use + * the same IOVA. + */ +static __maybe_unused struct iommufd_sw_msi_map * +iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, + phys_addr_t sw_msi_start) +{ + struct iommufd_sw_msi_map *cur; + unsigned int max_pgoff = 0; + + lockdep_assert_held(&ictx->sw_msi_lock); + + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + if (cur->sw_msi_start != sw_msi_start) + continue; + max_pgoff = max(max_pgoff, cur->pgoff + 1); + if (cur->msi_addr == msi_addr) + return cur; + } + + if (ictx->sw_msi_id >= + BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) + return ERR_PTR(-EOVERFLOW); + + cur = kzalloc(sizeof(*cur), GFP_KERNEL); + if (!cur) + return ERR_PTR(-ENOMEM); + + cur->sw_msi_start = sw_msi_start; + cur->msi_addr = msi_addr; + cur->pgoff = max_pgoff; + cur->id = ictx->sw_msi_id++; + list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); + return cur; +} + +static int iommufd_sw_msi_install(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_sw_msi_map *msi_map) +{ + unsigned long iova; + + lockdep_assert_held(&ictx->sw_msi_lock); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { + int rc; + + rc = iommu_map(hwpt_paging->common.domain, iova, + msi_map->msi_addr, PAGE_SIZE, + IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, + GFP_KERNEL_ACCOUNT); + if (rc) + return rc; + __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); + } + return 0; +} + +/* + * Called by the irq code if the platform translates the MSI address through the + * IOMMU. msi_addr is the physical address of the MSI page. iommufd will + * allocate a fd global iova for the physical page that is the same on all + * domains and devices. + */ +#ifdef CONFIG_IRQ_MSI_IOMMU +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr) +{ + struct device *dev = msi_desc_to_dev(desc); + struct iommufd_hwpt_paging *hwpt_paging; + struct iommu_attach_handle *raw_handle; + struct iommufd_attach_handle *handle; + struct iommufd_sw_msi_map *msi_map; + struct iommufd_ctx *ictx; + unsigned long iova; + int rc; + + /* + * It is safe to call iommu_attach_handle_get() here because the iommu + * core code invokes this under the group mutex which also prevents any + * change of the attach handle for the duration of this function. + */ + iommu_group_mutex_assert(dev); + + raw_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(raw_handle)) + return 0; + hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); + + handle = to_iommufd_handle(raw_handle); + /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ + if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; + + ictx = handle->idev->ictx; + guard(mutex)(&ictx->sw_msi_lock); + /* + * The input msi_addr is the exact byte offset of the MSI doorbell, we + * assume the caller has checked that it is contained with a MMIO region + * that is secure to map at PAGE_SIZE. + */ + msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, + msi_addr & PAGE_MASK, + handle->idev->igroup->sw_msi_start); + if (IS_ERR(msi_map)) + return PTR_ERR(msi_map); + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); + if (rc) + return rc; + __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); + + iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; + msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); + return 0; +} +#endif + static int iommufd_group_setup_msi(struct iommufd_group *igroup, struct iommufd_hwpt_paging *hwpt_paging) { - phys_addr_t sw_msi_start = igroup->sw_msi_start; - int rc; + struct iommufd_ctx *ictx = igroup->ictx; + struct iommufd_sw_msi_map *cur; + + if (igroup->sw_msi_start == PHYS_ADDR_MAX) + return 0; /* - * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to - * call iommu_get_msi_cookie() on its behalf. This is necessary to setup - * the MSI window so iommu_dma_prepare_msi() can install pages into our - * domain after request_irq(). If it is not done interrupts will not - * work on this domain. - * - * FIXME: This is conceptually broken for iommufd since we want to allow - * userspace to change the domains, eg switch from an identity IOAS to a - * DMA IOAS. There is currently no way to create a MSI window that - * matches what the IRQ layer actually expects in a newly created - * domain. + * Install all the MSI pages the device has been using into the domain */ - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { - rc = iommu_get_msi_cookie(hwpt_paging->common.domain, - sw_msi_start); + guard(mutex)(&ictx->sw_msi_lock); + list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { + int rc; + + if (cur->sw_msi_start != igroup->sw_msi_start || + !test_bit(cur->id, igroup->required_sw_msi.bitmap)) + continue; + + rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur); if (rc) return rc; - - /* - * iommu_get_msi_cookie() can only be called once per domain, - * it returns -EBUSY on later calls. - */ - hwpt_paging->msi_cookie = true; } return 0; } @@ -352,6 +469,111 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev, return 0; } +/* The device attach/detach/replace helpers for attach_handle */ + +static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + int rc; + + lockdep_assert_held(&idev->igroup->lock); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + if (hwpt->fault) { + rc = iommufd_fault_iopf_enable(idev); + if (rc) + goto out_free_handle; + } + + handle->idev = idev; + rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + if (rc) + goto out_disable_iopf; + + return 0; + +out_disable_iopf: + if (hwpt->fault) + iommufd_fault_iopf_disable(idev); +out_free_handle: + kfree(handle); + return rc; +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev) +{ + struct iommu_attach_handle *handle; + + lockdep_assert_held(&idev->igroup->lock); + + handle = + iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + if (IS_ERR(handle)) + return NULL; + return to_iommufd_handle(handle); +} + +static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev); + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + if (hwpt->fault) { + iommufd_auto_response_faults(hwpt, handle); + iommufd_fault_iopf_disable(idev); + } + kfree(handle); +} + +static int iommufd_hwpt_replace_device(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *old_handle = + iommufd_device_get_attach_handle(idev); + int rc; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + if (hwpt->fault && !old->fault) { + rc = iommufd_fault_iopf_enable(idev); + if (rc) + goto out_free_handle; + } + + handle->idev = idev; + rc = iommu_replace_group_handle(idev->igroup->group, hwpt->domain, + &handle->handle); + if (rc) + goto out_disable_iopf; + + if (old->fault) { + iommufd_auto_response_faults(hwpt, old_handle); + if (!hwpt->fault) + iommufd_fault_iopf_disable(idev); + } + kfree(old_handle); + + return 0; + +out_disable_iopf: + if (hwpt->fault && !old->fault) + iommufd_fault_iopf_disable(idev); +out_free_handle: + kfree(handle); + return rc; +} + int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index d9a937450e55..c48d72c9668c 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -17,7 +17,7 @@ #include "../iommu-priv.h" #include "iommufd_private.h" -static int iommufd_fault_iopf_enable(struct iommufd_device *idev) +int iommufd_fault_iopf_enable(struct iommufd_device *idev) { struct device *dev = idev->dev; int ret; @@ -50,7 +50,7 @@ static int iommufd_fault_iopf_enable(struct iommufd_device *idev) return ret; } -static void iommufd_fault_iopf_disable(struct iommufd_device *idev) +void iommufd_fault_iopf_disable(struct iommufd_device *idev) { mutex_lock(&idev->iopf_lock); if (!WARN_ON(idev->iopf_enabled == 0)) { @@ -60,46 +60,8 @@ static void iommufd_fault_iopf_disable(struct iommufd_device *idev) mutex_unlock(&idev->iopf_lock); } -static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - int ret; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, - &handle->handle); - if (ret) - kfree(handle); - - return ret; -} - -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - int ret; - - if (!hwpt->fault) - return -EINVAL; - - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - - ret = __fault_domain_attach_dev(hwpt, idev); - if (ret) - iommufd_fault_iopf_disable(idev); - - return ret; -} - -static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, - struct iommufd_attach_handle *handle) +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) { struct iommufd_fault *fault = hwpt->fault; struct iopf_group *group, *next; @@ -135,88 +97,6 @@ static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, mutex_unlock(&fault->mutex); } -static struct iommufd_attach_handle * -iommufd_device_get_attach_handle(struct iommufd_device *idev) -{ - struct iommu_attach_handle *handle; - - handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); - if (IS_ERR(handle)) - return NULL; - - return to_iommufd_handle(handle); -} - -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - struct iommufd_attach_handle *handle; - - handle = iommufd_device_get_attach_handle(idev); - iommu_detach_group_handle(hwpt->domain, idev->igroup->group); - iommufd_auto_response_faults(hwpt, handle); - iommufd_fault_iopf_disable(idev); - kfree(handle); -} - -static int __fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - struct iommufd_attach_handle *handle, *curr = NULL; - int ret; - - if (old->fault) - curr = iommufd_device_get_attach_handle(idev); - - if (hwpt->fault) { - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) - return -ENOMEM; - - handle->idev = idev; - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, &handle->handle); - } else { - ret = iommu_replace_group_handle(idev->igroup->group, - hwpt->domain, NULL); - } - - if (!ret && curr) { - iommufd_auto_response_faults(old, curr); - kfree(curr); - } - - return ret; -} - -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - bool iopf_off = !hwpt->fault && old->fault; - bool iopf_on = hwpt->fault && !old->fault; - int ret; - - if (iopf_on) { - ret = iommufd_fault_iopf_enable(idev); - if (ret) - return ret; - } - - ret = __fault_domain_replace_dev(idev, hwpt, old); - if (ret) { - if (iopf_on) - iommufd_fault_iopf_disable(idev); - return ret; - } - - if (iopf_off) - iommufd_fault_iopf_disable(idev); - - return 0; -} - void iommufd_fault_destroy(struct iommufd_object *obj) { struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); @@ -449,7 +329,7 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) struct iommufd_hw_pagetable *hwpt; struct iommufd_fault *fault; - hwpt = group->attach_handle->domain->fault_data; + hwpt = group->attach_handle->domain->iommufd_hwpt; fault = hwpt->fault; spin_lock(&fault->lock); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 598be26a14e2..7de6e914232e 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -156,6 +156,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } } + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); /* * Set the coherency mode before we do iopt_table_add_domain() as some @@ -251,6 +252,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, goto out_abort; } hwpt->domain->owner = ops; + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -307,6 +309,7 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, goto out_abort; } hwpt->domain->owner = viommu->iommu_dev->ops; + iommu_domain_set_sw_msi(hwpt->domain, iommufd_sw_msi); if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { rc = -EINVAL; @@ -406,10 +409,10 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) } hwpt->fault = fault; hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; - hwpt->domain->fault_data = hwpt; refcount_inc(&fault->obj.users); iommufd_put_object(ucmd->ictx, &fault->obj); } + hwpt->domain->iommufd_hwpt = hwpt; cmd->out_hwpt_id = hwpt->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 0b1bafc7fd99..246297452a44 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -19,6 +19,22 @@ struct iommu_group; struct iommu_option; struct iommufd_device; +struct iommufd_sw_msi_map { + struct list_head sw_msi_item; + phys_addr_t sw_msi_start; + phys_addr_t msi_addr; + unsigned int pgoff; + unsigned int id; +}; + +/* Bitmap of struct iommufd_sw_msi_map::id */ +struct iommufd_sw_msi_maps { + DECLARE_BITMAP(bitmap, 64); +}; + +int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, + phys_addr_t msi_addr); + struct iommufd_ctx { struct file *file; struct xarray objects; @@ -26,6 +42,10 @@ struct iommufd_ctx { wait_queue_head_t destroy_wait; struct rw_semaphore ioas_creation_lock; + struct mutex sw_msi_lock; + struct list_head sw_msi_list; + unsigned int sw_msi_id; + u8 account_mode; /* Compatibility with VFIO no iommu */ u8 no_iommu_mode; @@ -283,10 +303,10 @@ struct iommufd_hwpt_paging { struct iommufd_ioas *ioas; bool auto_domain : 1; bool enforce_cache_coherency : 1; - bool msi_cookie : 1; bool nest_parent : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; + struct iommufd_sw_msi_maps present_sw_msi; }; struct iommufd_hwpt_nested { @@ -383,6 +403,7 @@ struct iommufd_group { struct iommu_group *group; struct iommufd_hw_pagetable *hwpt; struct list_head device_list; + struct iommufd_sw_msi_maps required_sw_msi; phys_addr_t sw_msi_start; }; @@ -496,43 +517,10 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); void iommufd_fault_destroy(struct iommufd_object *obj); int iommufd_fault_iopf_handler(struct iopf_group *group); -int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev); -int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old); - -static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - if (hwpt->fault) - return iommufd_fault_domain_attach_dev(hwpt, idev); - - return iommu_attach_group(hwpt->domain, idev->igroup->group); -} - -static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, - struct iommufd_device *idev) -{ - if (hwpt->fault) { - iommufd_fault_domain_detach_dev(hwpt, idev); - return; - } - - iommu_detach_group(hwpt->domain, idev->igroup->group); -} - -static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, - struct iommufd_hw_pagetable *hwpt, - struct iommufd_hw_pagetable *old) -{ - if (old->fault || hwpt->fault) - return iommufd_fault_domain_replace_dev(idev, hwpt, old); - - return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); -} +int iommufd_fault_iopf_enable(struct iommufd_device *idev); +void iommufd_fault_iopf_disable(struct iommufd_device *idev); +void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle); static inline struct iommufd_viommu * iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index ccf616462a1c..b6fa9fd11bc1 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -227,6 +227,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init(&ictx->groups); ictx->file = filp; init_waitqueue_head(&ictx->destroy_wait); + mutex_init(&ictx->sw_msi_lock); + INIT_LIST_HEAD(&ictx->sw_msi_list); filp->private_data = ictx; return 0; } @@ -234,6 +236,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) static int iommufd_fops_release(struct inode *inode, struct file *filp) { struct iommufd_ctx *ictx = filp->private_data; + struct iommufd_sw_msi_map *next; + struct iommufd_sw_msi_map *cur; struct iommufd_object *obj; /* @@ -262,6 +266,11 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp) break; } WARN_ON(!xa_empty(&ictx->groups)); + + mutex_destroy(&ictx->sw_msi_lock); + list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item) + kfree(cur); + kfree(ictx); return 0; } diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index a565b9e40f4a..66824982e05f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -27,11 +27,20 @@ #include <linux/spinlock.h> #include <linux/string_choices.h> #include <asm/barrier.h> -#include <asm/dma-iommu.h> #include <dt-bindings/memory/mtk-memory-port.h> #include <dt-bindings/memory/mt2701-larb-port.h> #include <soc/mediatek/smi.h> +#if defined(CONFIG_ARM) +#include <asm/dma-iommu.h> +#else +#define arm_iommu_create_mapping(...) NULL +#define arm_iommu_attach_device(...) -ENODEV +struct dma_iommu_mapping { + struct iommu_domain *domain; +}; +#endif + #define REG_MMU_PT_BASE_ADDR 0x000 #define F_ALL_INVLD 0x2 @@ -446,22 +455,13 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + struct iommu_fwspec *fwspec = NULL; struct of_phandle_args iommu_spec; struct mtk_iommu_v1_data *data; int err, idx = 0, larbid, larbidx; struct device_link *link; struct device *larbdev; - /* - * In the deferred case, free the existed fwspec. - * Always initialize the fwspec internally. - */ - if (fwspec) { - iommu_fwspec_free(dev); - fwspec = dev_iommu_fwspec_get(dev); - } - while (!of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", idx, &iommu_spec)) { @@ -476,6 +476,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev) idx++; } + if (!fwspec) + return ERR_PTR(-ENODEV); + data = dev_iommu_priv_get(dev); /* Link the consumer device with the smi-larb device(supplier) */ diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 97987cd78da9..6b989a62def2 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -116,6 +116,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np) int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { + bool dev_iommu_present; int err; if (!master_np) @@ -127,6 +128,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, mutex_unlock(&iommu_probe_device_lock); return 0; } + dev_iommu_present = dev->iommu; /* * We don't currently walk up the tree looking for a parent IOMMU. @@ -147,11 +149,18 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, err = of_iommu_configure_device(master_np, dev, id); } - if (err) + if (err && dev_iommu_present) iommu_fwspec_free(dev); + else if (err && dev->iommu) + dev_iommu_free(dev); mutex_unlock(&iommu_probe_device_lock); - if (!err && dev->bus) + /* + * If we're not on the iommu_probe_device() path (as indicated by the + * initial dev->iommu) then try to simulate it. This should no longer + * happen unless of_dma_configure() is being misused outside bus code. + */ + if (!err && dev->bus && !dev_iommu_present) err = iommu_probe_device(dev); if (err && err != -EPROBE_DEFER) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 323cc665c357..af4cc91b2bbf 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -88,6 +88,7 @@ struct rk_iommu_domain { dma_addr_t dt_dma; spinlock_t iommus_lock; /* lock for iommus list */ spinlock_t dt_lock; /* lock for modifying page directory table */ + struct device *dma_dev; struct iommu_domain domain; }; @@ -123,7 +124,6 @@ struct rk_iommudata { struct rk_iommu *iommu; }; -static struct device *dma_dev; static const struct rk_iommu_ops *rk_ops; static struct iommu_domain rk_identity_domain; @@ -132,7 +132,7 @@ static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma, { size_t size = count * sizeof(u32); /* count of u32 entry */ - dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE); + dma_sync_single_for_device(dom->dma_dev, dma, size, DMA_TO_DEVICE); } static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom) @@ -734,9 +734,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (!page_table) return ERR_PTR(-ENOMEM); - pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, pt_dma)) { - dev_err(dma_dev, "DMA mapping error while allocating page table\n"); + pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE); + if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) { + dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n"); iommu_free_page(page_table); return ERR_PTR(-ENOMEM); } @@ -1051,9 +1051,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain, static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) { struct rk_iommu_domain *rk_domain; - - if (!dma_dev) - return NULL; + struct rk_iommu *iommu; rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL); if (!rk_domain) @@ -1068,10 +1066,12 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev) if (!rk_domain->dt) goto err_free_domain; - rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt, + iommu = rk_iommu_from_dev(dev); + rk_domain->dma_dev = iommu->dev; + rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev, rk_domain->dt, SPAGE_SIZE, DMA_TO_DEVICE); - if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) { - dev_err(dma_dev, "DMA map error for DT\n"); + if (dma_mapping_error(rk_domain->dma_dev, rk_domain->dt_dma)) { + dev_err(rk_domain->dma_dev, "DMA map error for DT\n"); goto err_free_dt; } @@ -1105,13 +1105,13 @@ static void rk_iommu_domain_free(struct iommu_domain *domain) if (rk_dte_is_pt_valid(dte)) { phys_addr_t pt_phys = rk_ops->pt_address(dte); u32 *page_table = phys_to_virt(pt_phys); - dma_unmap_single(dma_dev, pt_phys, + dma_unmap_single(rk_domain->dma_dev, pt_phys, SPAGE_SIZE, DMA_TO_DEVICE); iommu_free_page(page_table); } } - dma_unmap_single(dma_dev, rk_domain->dt_dma, + dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma, SPAGE_SIZE, DMA_TO_DEVICE); iommu_free_page(rk_domain->dt); @@ -1148,12 +1148,12 @@ static int rk_iommu_of_xlate(struct device *dev, struct platform_device *iommu_dev; struct rk_iommudata *data; - data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL); + iommu_dev = of_find_device_by_node(args->np); + + data = devm_kzalloc(&iommu_dev->dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; - iommu_dev = of_find_device_by_node(args->np); - data->iommu = platform_get_drvdata(iommu_dev); data->iommu->domain = &rk_identity_domain; dev_iommu_priv_set(dev, data); @@ -1256,22 +1256,6 @@ static int rk_iommu_probe(struct platform_device *pdev) if (err) return err; - err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); - if (err) - goto err_unprepare_clocks; - - err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); - if (err) - goto err_remove_sysfs; - - /* - * Use the first registered IOMMU device for domain to use with DMA - * API, since a domain might not physically correspond to a single - * IOMMU device.. - */ - if (!dma_dev) - dma_dev = &pdev->dev; - pm_runtime_enable(dev); for (i = 0; i < iommu->num_irq; i++) { @@ -1290,12 +1274,19 @@ static int rk_iommu_probe(struct platform_device *pdev) dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask); + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); + if (err) + goto err_pm_disable; + + err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev); + if (err) + goto err_remove_sysfs; + return 0; -err_pm_disable: - pm_runtime_disable(dev); err_remove_sysfs: iommu_device_sysfs_remove(&iommu->iommu); -err_unprepare_clocks: +err_pm_disable: + pm_runtime_disable(dev); clk_bulk_unprepare(iommu->num_clocks, iommu->clocks); return err; } diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index fbdeded3d48b..e1c76e0f9c2b 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -16,7 +16,7 @@ #include "dma-iommu.h" -static const struct iommu_ops s390_iommu_ops; +static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops; static struct kmem_cache *dma_region_table_cache; static struct kmem_cache *dma_page_table_cache; @@ -381,6 +381,46 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev, spin_unlock_irqrestore(&zdev->dom_lock, flags); } +static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev, + struct iommu_domain *domain, u8 *status) +{ + struct s390_domain *s390_domain; + int rc = 0; + u64 iota; + + switch (domain->type) { + case IOMMU_DOMAIN_IDENTITY: + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, + zdev->end_dma, 0, status); + break; + case IOMMU_DOMAIN_BLOCKED: + /* Nothing to do in this case */ + break; + default: + s390_domain = to_s390_domain(domain); + iota = virt_to_phys(s390_domain->dma_table) | + ZPCI_IOTA_RTTO_FLAG; + rc = zpci_register_ioat(zdev, 0, zdev->start_dma, + zdev->end_dma, iota, status); + } + + return rc; +} + +int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&zdev->dom_lock, flags); + + rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status); + + spin_unlock_irqrestore(&zdev->dom_lock, flags); + + return rc; +} + static int blocking_domain_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -392,9 +432,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain, return 0; s390_domain = to_s390_domain(zdev->s390_domain); - spin_lock_irqsave(&s390_domain->list_lock, flags); - list_del_rcu(&zdev->iommu_list); - spin_unlock_irqrestore(&s390_domain->list_lock, flags); + if (zdev->dma_table) { + spin_lock_irqsave(&s390_domain->list_lock, flags); + list_del_rcu(&zdev->iommu_list); + spin_unlock_irqrestore(&s390_domain->list_lock, flags); + } zpci_unregister_ioat(zdev, 0); zdev->dma_table = NULL; @@ -422,8 +464,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, blocking_domain_attach_device(&blocking_domain, dev); /* If we fail now DMA remains blocked via blocking domain */ - cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(s390_domain->dma_table), &status); + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; zdev->dma_table = s390_domain->dma_table; @@ -723,7 +764,13 @@ int zpci_init_iommu(struct zpci_dev *zdev) if (rc) goto out_err; - rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL); + if (zdev->rtr_avail) { + rc = iommu_device_register(&zdev->iommu_dev, + &s390_iommu_rtr_ops, NULL); + } else { + rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, + NULL); + } if (rc) goto out_sysfs; @@ -787,6 +834,39 @@ static int __init s390_iommu_init(void) } subsys_initcall(s390_iommu_init); +static int s390_attach_dev_identity(struct iommu_domain *domain, + struct device *dev) +{ + struct zpci_dev *zdev = to_zpci_dev(dev); + u8 status; + int cc; + + blocking_domain_attach_device(&blocking_domain, dev); + + /* If we fail now DMA remains blocked via blocking domain */ + cc = s390_iommu_domain_reg_ioat(zdev, domain, &status); + + /* + * If the device is undergoing error recovery the reset code + * will re-establish the new domain. + */ + if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) + return -EIO; + + zdev_s390_domain_update(zdev, domain); + + return 0; +} + +static const struct iommu_domain_ops s390_identity_ops = { + .attach_dev = s390_attach_dev_identity, +}; + +static struct iommu_domain s390_identity_domain = { + .type = IOMMU_DOMAIN_IDENTITY, + .ops = &s390_identity_ops, +}; + static struct iommu_domain blocking_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { @@ -794,23 +874,31 @@ static struct iommu_domain blocking_domain = { } }; -static const struct iommu_ops s390_iommu_ops = { - .blocked_domain = &blocking_domain, - .release_domain = &blocking_domain, - .capable = s390_iommu_capable, - .domain_alloc_paging = s390_domain_alloc_paging, - .probe_device = s390_iommu_probe_device, - .device_group = generic_device_group, - .pgsize_bitmap = SZ_4K, - .get_resv_regions = s390_iommu_get_resv_regions, - .default_domain_ops = &(const struct iommu_domain_ops) { - .attach_dev = s390_iommu_attach_device, - .map_pages = s390_iommu_map_pages, - .unmap_pages = s390_iommu_unmap_pages, - .flush_iotlb_all = s390_iommu_flush_iotlb_all, - .iotlb_sync = s390_iommu_iotlb_sync, - .iotlb_sync_map = s390_iommu_iotlb_sync_map, - .iova_to_phys = s390_iommu_iova_to_phys, - .free = s390_domain_free, +#define S390_IOMMU_COMMON_OPS() \ + .blocked_domain = &blocking_domain, \ + .release_domain = &blocking_domain, \ + .capable = s390_iommu_capable, \ + .domain_alloc_paging = s390_domain_alloc_paging, \ + .probe_device = s390_iommu_probe_device, \ + .device_group = generic_device_group, \ + .pgsize_bitmap = SZ_4K, \ + .get_resv_regions = s390_iommu_get_resv_regions, \ + .default_domain_ops = &(const struct iommu_domain_ops) { \ + .attach_dev = s390_iommu_attach_device, \ + .map_pages = s390_iommu_map_pages, \ + .unmap_pages = s390_iommu_unmap_pages, \ + .flush_iotlb_all = s390_iommu_flush_iotlb_all, \ + .iotlb_sync = s390_iommu_iotlb_sync, \ + .iotlb_sync_map = s390_iommu_iotlb_sync_map, \ + .iova_to_phys = s390_iommu_iova_to_phys, \ + .free = s390_domain_free, \ } + +static const struct iommu_ops s390_iommu_ops = { + S390_IOMMU_COMMON_OPS() +}; + +static const struct iommu_ops s390_iommu_rtr_ops = { + .identity_domain = &s390_identity_domain, + S390_IOMMU_COMMON_OPS() }; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 7f633bb5efef..69d353e1df84 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -846,7 +846,6 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, err = ops->of_xlate(dev, args); if (err < 0) { dev_err(dev, "failed to parse SW group ID: %d\n", err); - iommu_fwspec_free(dev); return err; } |