diff options
Diffstat (limited to 'drivers/iommu/intel/iommu.c')
-rw-r--r-- | drivers/iommu/intel/iommu.c | 299 |
1 files changed, 109 insertions, 190 deletions
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 79e0da9eb626..ec2f385ae25b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -29,7 +29,6 @@ #include "../irq_remapping.h" #include "../iommu-pages.h" #include "pasid.h" -#include "cap_audit.h" #include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE @@ -738,7 +737,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return NULL; domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + pteval = virt_to_phys(tmp_page) | DMA_PTE_READ | + DMA_PTE_WRITE; if (domain->use_first_level) pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; @@ -1173,32 +1173,59 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) return true; } -static void iommu_enable_pci_caps(struct device_domain_info *info) +static void iommu_enable_pci_ats(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_supported) return; pdev = to_pci_dev(info->dev); - if (info->ats_supported && pci_ats_page_aligned(pdev) && - !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) + if (!pci_ats_page_aligned(pdev)) + return; + + if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT)) info->ats_enabled = 1; } -static void iommu_disable_pci_caps(struct device_domain_info *info) +static void iommu_disable_pci_ats(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + pci_disable_ats(to_pci_dev(info->dev)); + info->ats_enabled = 0; +} + +static void iommu_enable_pci_pri(struct device_domain_info *info) { struct pci_dev *pdev; - if (!dev_is_pci(info->dev)) + if (!info->ats_enabled || !info->pri_supported) return; pdev = to_pci_dev(info->dev); + /* PASID is required in PRG Response Message. */ + if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) + return; - if (info->ats_enabled) { - pci_disable_ats(pdev); - info->ats_enabled = 0; - } + if (pci_reset_pri(pdev)) + return; + + if (!pci_enable_pri(pdev, PRQ_DEPTH)) + info->pri_enabled = 1; +} + +static void iommu_disable_pci_pri(struct device_domain_info *info) +{ + if (!info->pri_enabled) + return; + + if (WARN_ON(info->iopf_refcount)) + iopf_queue_remove_device(info->iommu->iopf_queue, info->dev); + + pci_disable_pri(to_pci_dev(info->dev)); + info->pri_enabled = 0; } static void intel_flush_iotlb_all(struct iommu_domain *domain) @@ -1557,12 +1584,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; u8 bus = info->bus, devfn = info->devfn; + int ret; if (!dev_is_pci(dev)) return domain_context_mapping_one(domain, iommu, bus, devfn); - return pci_for_each_dma_alias(to_pci_dev(dev), - domain_context_mapping_cb, domain); + ret = pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapping_cb, domain); + if (ret) + return ret; + + iommu_enable_pci_ats(info); + + return 0; } /* Return largest possible superpage level for a given mapping */ @@ -1749,7 +1783,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - intel_context_flush_present(info, context, did, true); + intel_context_flush_no_pasid(info, context, did); } int __domain_setup_first_level(struct intel_iommu *iommu, @@ -1844,8 +1878,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, if (ret) goto out_block_translation; - iommu_enable_pci_caps(info); - ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID); if (ret) goto out_block_translation; @@ -2118,10 +2150,6 @@ static int __init init_dmars(void) struct intel_iommu *iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL); - if (ret) - goto free_iommu; - for_each_iommu(iommu, drhd) { if (drhd->ignored) { iommu_disable_translation(iommu); @@ -2617,10 +2645,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) struct intel_iommu *iommu = dmaru->iommu; int ret; - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu); - if (ret) - goto out; - /* * Disable translation if already enabled prior to OS handover. */ @@ -2880,16 +2904,19 @@ void intel_iommu_shutdown(void) if (no_iommu || dmar_disabled) return; - down_write(&dmar_global_lock); + /* + * All other CPUs were brought down, hotplug interrupts were disabled, + * no lock and RCU checking needed anymore + */ + list_for_each_entry(drhd, &dmar_drhd_units, list) { + iommu = drhd->iommu; - /* Disable PMRs explicitly here. */ - for_each_iommu(iommu, drhd) + /* Disable PMRs explicitly here. */ iommu_disable_protect_mem_regions(iommu); - /* Make sure the IOMMUs are switched off */ - intel_disable_iommus(); - - up_write(&dmar_global_lock); + /* Make sure the IOMMUs are switched off */ + iommu_disable_translation(iommu); + } } static struct intel_iommu *dev_to_intel_iommu(struct device *dev) @@ -3022,6 +3049,7 @@ static int __init probe_acpi_namespace_devices(void) if (dev->bus != &acpi_bus_type) continue; + up_read(&dmar_global_lock); adev = to_acpi_device(dev); mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, @@ -3031,6 +3059,7 @@ static int __init probe_acpi_namespace_devices(void) break; } mutex_unlock(&adev->physical_node_lock); + down_read(&dmar_global_lock); if (ret) return ret; @@ -3155,7 +3184,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } @@ -3207,6 +3243,7 @@ static void domain_context_clear(struct device_domain_info *info) pci_for_each_dma_alias(to_pci_dev(info->dev), &domain_context_clear_one_cb, info); + iommu_disable_pci_ats(info); } /* @@ -3223,7 +3260,6 @@ void device_block_translation(struct device *dev) if (info->domain) cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID); - iommu_disable_pci_caps(info); if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, @@ -3250,10 +3286,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain, return 0; } +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old); + static struct iommu_domain blocking_domain = { .type = IOMMU_DOMAIN_BLOCKED, .ops = &(const struct iommu_domain_ops) { .attach_dev = blocking_domain_attach_dev, + .set_dev_pasid = blocking_domain_set_dev_pasid, } }; @@ -3342,8 +3383,7 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, bool first_stage; if (flags & - (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING - | IOMMU_HWPT_FAULT_ID_VALID))) + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) return ERR_PTR(-EOPNOTSUPP); if (nested_parent && !nested_supported(iommu)) return ERR_PTR(-EOPNOTSUPP); @@ -3754,6 +3794,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; + if (sm_supported(iommu)) + iommu_enable_pci_ats(info); + iommu_enable_pci_pri(info); + return &iommu->iommu; free_table: intel_pasid_free_table(dev); @@ -3770,6 +3814,9 @@ static void intel_iommu_release_device(struct device *dev) struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; + iommu_disable_pci_pri(info); + iommu_disable_pci_ats(info); + if (info->pasid_enabled) { pci_disable_pasid(to_pci_dev(dev)); info->pasid_enabled = 0; @@ -3856,151 +3903,41 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev) return generic_device_group(dev); } -static int intel_iommu_enable_sva(struct device *dev) +int intel_iommu_enable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; - - if (!info || dmar_disabled) - return -EINVAL; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE)) - return -ENODEV; - - if (!info->pasid_enabled || !info->ats_enabled) - return -EINVAL; - - /* - * Devices having device-specific I/O fault handling should not - * support PCI/PRI. The IOMMU side has no means to check the - * capability of device-specific IOPF. Therefore, IOMMU can only - * default that if the device driver enables SVA on a non-PRI - * device, it will handle IOPF in its own way. - */ - if (!info->pri_supported) - return 0; - - /* Devices supporting PRI should have it enabled. */ - if (!info->pri_enabled) - return -EINVAL; - - return 0; -} - -static int context_flip_pri(struct device_domain_info *info, bool enable) -{ struct intel_iommu *iommu = info->iommu; - u8 bus = info->bus, devfn = info->devfn; - struct context_entry *context; - u16 did; - - spin_lock(&iommu->lock); - if (context_copied(iommu, bus, devfn)) { - spin_unlock(&iommu->lock); - return -EINVAL; - } - - context = iommu_context_addr(iommu, bus, devfn, false); - if (!context || !context_present(context)) { - spin_unlock(&iommu->lock); - return -ENODEV; - } - did = context_domain_id(context); - - if (enable) - context_set_sm_pre(context); - else - context_clear_sm_pre(context); - - if (!ecap_coherent(iommu->ecap)) - clflush_cache_range(context, sizeof(*context)); - intel_context_flush_present(info, context, did, true); - spin_unlock(&iommu->lock); - - return 0; -} - -static int intel_iommu_enable_iopf(struct device *dev) -{ - struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu; int ret; - if (!pdev || !info || !info->ats_enabled || !info->pri_supported) + if (!info->pri_enabled) return -ENODEV; - if (info->pri_enabled) - return -EBUSY; - - iommu = info->iommu; - if (!iommu) - return -EINVAL; - - /* PASID is required in PRG Response Message. */ - if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev)) - return -EINVAL; - - ret = pci_reset_pri(pdev); - if (ret) - return ret; + if (info->iopf_refcount) { + info->iopf_refcount++; + return 0; + } ret = iopf_queue_add_device(iommu->iopf_queue, dev); if (ret) return ret; - ret = context_flip_pri(info, true); - if (ret) - goto err_remove_device; - - ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) - goto err_clear_pri; - - info->pri_enabled = 1; + info->iopf_refcount = 1; return 0; -err_clear_pri: - context_flip_pri(info, false); -err_remove_device: - iopf_queue_remove_device(iommu->iopf_queue, dev); - - return ret; } -static int intel_iommu_disable_iopf(struct device *dev) +void intel_iommu_disable_iopf(struct device *dev) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; - if (!info->pri_enabled) - return -EINVAL; + if (WARN_ON(!info->pri_enabled || !info->iopf_refcount)) + return; - /* Disable new PRI reception: */ - context_flip_pri(info, false); + if (--info->iopf_refcount) + return; - /* - * Remove device from fault queue and acknowledge all outstanding - * PRQs to the device: - */ iopf_queue_remove_device(iommu->iopf_queue, dev); - - /* - * PCIe spec states that by clearing PRI enable bit, the Page - * Request Interface will not issue new page requests, but has - * outstanding page requests that have been transmitted or are - * queued for transmission. This is supposed to be called after - * the device driver has stopped DMA, all PASIDs have been - * unbound and the outstanding PRQs have been drained. - */ - pci_disable_pri(to_pci_dev(dev)); - info->pri_enabled = 0; - - return 0; } static int @@ -4011,7 +3948,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) return intel_iommu_enable_iopf(dev); case IOMMU_DEV_FEAT_SVA: - return intel_iommu_enable_sva(dev); + return 0; default: return -ENODEV; @@ -4023,7 +3960,8 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) { switch (feat) { case IOMMU_DEV_FEAT_IOPF: - return intel_iommu_disable_iopf(dev); + intel_iommu_disable_iopf(dev); + return 0; case IOMMU_DEV_FEAT_SVA: return 0; @@ -4090,22 +4028,26 @@ void domain_remove_dev_pasid(struct iommu_domain *domain, break; } } - WARN_ON_ONCE(!dev_pasid); spin_unlock_irqrestore(&dmar_domain->lock, flags); cache_tag_unassign_domain(dmar_domain, dev, pasid); domain_detach_iommu(dmar_domain, iommu); - intel_iommu_debugfs_remove_dev_pasid(dev_pasid); - kfree(dev_pasid); + if (!WARN_ON_ONCE(!dev_pasid)) { + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); + kfree(dev_pasid); + } } -static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) +static int blocking_domain_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid, + struct iommu_domain *old) { struct device_domain_info *info = dev_iommu_priv_get(dev); intel_pasid_tear_down_entry(info->iommu, dev, pasid, false); - domain_remove_dev_pasid(domain, dev, pasid); + domain_remove_dev_pasid(old, dev, pasid); + + return 0; } struct dev_pasid_info * @@ -4379,9 +4321,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } @@ -4407,13 +4346,10 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device if (dev_is_real_dma_subdevice(dev)) return 0; - if (sm_supported(iommu)) { + if (sm_supported(iommu)) ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID); - if (!ret) - iommu_enable_pci_caps(info); - } else { + else ret = device_setup_pass_through(dev); - } return ret; } @@ -4445,21 +4381,6 @@ static struct iommu_domain identity_domain = { }, }; -static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev) -{ - struct device_domain_info *info = dev_iommu_priv_get(dev); - struct intel_iommu *iommu = info->iommu; - struct dmar_domain *dmar_domain; - bool first_stage; - - first_stage = first_level_by_default(iommu); - dmar_domain = paging_domain_alloc(dev, first_stage); - if (IS_ERR(dmar_domain)) - return ERR_CAST(dmar_domain); - - return &dmar_domain->domain; -} - const struct iommu_ops intel_iommu_ops = { .blocked_domain = &blocking_domain, .release_domain = &blocking_domain, @@ -4468,7 +4389,6 @@ const struct iommu_ops intel_iommu_ops = { .hw_info = intel_iommu_hw_info, .domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags, .domain_alloc_sva = intel_svm_domain_alloc, - .domain_alloc_paging = intel_iommu_domain_alloc_paging, .domain_alloc_nested = intel_iommu_domain_alloc_nested, .probe_device = intel_iommu_probe_device, .release_device = intel_iommu_release_device, @@ -4478,7 +4398,6 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, - .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, .page_response = intel_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { |