diff options
-rw-r--r-- | drivers/dma/idxd/init.c | 2 | ||||
-rw-r--r-- | drivers/iommu/io-pgfault.c | 63 | ||||
-rw-r--r-- | drivers/iommu/iommu-priv.h | 11 | ||||
-rw-r--r-- | drivers/iommu/iommu-sva.c | 42 | ||||
-rw-r--r-- | drivers/iommu/iommu.c | 185 | ||||
-rw-r--r-- | drivers/iommu/iommufd/Makefile | 1 | ||||
-rw-r--r-- | drivers/iommu/iommufd/device.c | 7 | ||||
-rw-r--r-- | drivers/iommu/iommufd/fault.c | 433 | ||||
-rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 38 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 80 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_test.h | 8 | ||||
-rw-r--r-- | drivers/iommu/iommufd/main.c | 6 | ||||
-rw-r--r-- | drivers/iommu/iommufd/selftest.c | 64 | ||||
-rw-r--r-- | include/linux/iommu.h | 41 | ||||
-rw-r--r-- | include/uapi/linux/iommufd.h | 109 | ||||
-rw-r--r-- | tools/testing/selftests/iommu/iommufd.c | 22 | ||||
-rw-r--r-- | tools/testing/selftests/iommu/iommufd_fail_nth.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/iommu/iommufd_utils.h | 86 |
18 files changed, 1083 insertions, 117 deletions
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a7295943fa22..385c488c9cd1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd) * DMA domain is owned by the driver, it should support all valid * types such as DMA-FQ, identity, etc. */ - ret = iommu_attach_device_pasid(domain, dev, pasid); + ret = iommu_attach_device_pasid(domain, dev, pasid, NULL); if (ret) { dev_err(dev, "failed to attach device pasid %d, domain type %d", pasid, domain->type); diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 06d78fcc79fd..cd679c13752e 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -59,30 +59,6 @@ void iopf_free_group(struct iopf_group *group) } EXPORT_SYMBOL_GPL(iopf_free_group); -static struct iommu_domain *get_domain_for_iopf(struct device *dev, - struct iommu_fault *fault) -{ - struct iommu_domain *domain; - - if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { - domain = iommu_get_domain_for_dev_pasid(dev, fault->prm.pasid, 0); - if (IS_ERR(domain)) - domain = NULL; - } else { - domain = iommu_get_domain_for_dev(dev); - } - - if (!domain || !domain->iopf_handler) { - dev_warn_ratelimited(dev, - "iopf (pasid %d) without domain attached or handler installed\n", - fault->prm.pasid); - - return NULL; - } - - return domain; -} - /* Non-last request of a group. Postpone until the last one. */ static int report_partial_fault(struct iommu_fault_param *fault_param, struct iommu_fault *fault) @@ -134,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param, list_add(&group->pending_node, &iopf_param->faults); mutex_unlock(&iopf_param->lock); + group->fault_count = list_count_nodes(&group->faults); + return group; } @@ -206,20 +184,51 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) if (group == &abort_group) goto err_abort; - group->domain = get_domain_for_iopf(dev, fault); - if (!group->domain) + if (fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) { + group->attach_handle = iommu_attach_handle_get(dev->iommu_group, + fault->prm.pasid, + 0); + if (IS_ERR(group->attach_handle)) { + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (!ops->user_pasid_table) + goto err_abort; + + /* + * The iommu driver for this device supports user- + * managed PASID table. Therefore page faults for + * any PASID should go through the NESTING domain + * attached to the device RID. + */ + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, + IOMMU_NO_PASID, + IOMMU_DOMAIN_NESTED); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + } else { + group->attach_handle = + iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); + if (IS_ERR(group->attach_handle)) + goto err_abort; + } + + if (!group->attach_handle->domain->iopf_handler) goto err_abort; /* * On success iopf_handler must call iopf_group_response() and * iopf_free_group() */ - if (group->domain->iopf_handler(group)) + if (group->attach_handle->domain->iopf_handler(group)) goto err_abort; return; err_abort: + dev_warn_ratelimited(dev, "iopf with pasid %d aborted\n", + fault->prm.pasid); iopf_group_response(group, IOMMU_PAGE_RESP_FAILURE); if (group == &abort_group) __iopf_free_group(group); diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 5f731d994803..c37801c32f33 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -28,4 +28,15 @@ void iommu_device_unregister_bus(struct iommu_device *iommu, const struct bus_type *bus, struct notifier_block *nb); +struct iommu_attach_handle *iommu_attach_handle_get(struct iommu_group *group, + ioasid_t pasid, + unsigned int type); +int iommu_attach_group_handle(struct iommu_domain *domain, + struct iommu_group *group, + struct iommu_attach_handle *handle); +void iommu_detach_group_handle(struct iommu_domain *domain, + struct iommu_group *group); +int iommu_replace_group_handle(struct iommu_group *group, + struct iommu_domain *new_domain, + struct iommu_attach_handle *handle); #endif /* __LINUX_IOMMU_PRIV_H */ diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 18a35e798b72..69cde094440e 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -41,7 +41,6 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de } iommu_mm->pasid = pasid; INIT_LIST_HEAD(&iommu_mm->sva_domains); - INIT_LIST_HEAD(&iommu_mm->sva_handles); /* * Make sure the write to mm->iommu_mm is not reordered in front of * initialization to iommu_mm fields. If it does, readers may see a @@ -69,11 +68,16 @@ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct de */ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { + struct iommu_group *group = dev->iommu_group; + struct iommu_attach_handle *attach_handle; struct iommu_mm_data *iommu_mm; struct iommu_domain *domain; struct iommu_sva *handle; int ret; + if (!group) + return ERR_PTR(-ENODEV); + mutex_lock(&iommu_sva_lock); /* Allocate mm->pasid if necessary. */ @@ -83,12 +87,22 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_unlock; } - list_for_each_entry(handle, &mm->iommu_mm->sva_handles, handle_item) { - if (handle->dev == dev) { - refcount_inc(&handle->users); - mutex_unlock(&iommu_sva_lock); - return handle; + /* A bond already exists, just take a reference`. */ + attach_handle = iommu_attach_handle_get(group, iommu_mm->pasid, IOMMU_DOMAIN_SVA); + if (!IS_ERR(attach_handle)) { + handle = container_of(attach_handle, struct iommu_sva, handle); + if (attach_handle->domain->mm != mm) { + ret = -EBUSY; + goto out_unlock; } + refcount_inc(&handle->users); + mutex_unlock(&iommu_sva_lock); + return handle; + } + + if (PTR_ERR(attach_handle) != -ENOENT) { + ret = PTR_ERR(attach_handle); + goto out_unlock; } handle = kzalloc(sizeof(*handle), GFP_KERNEL); @@ -99,7 +113,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm /* Search for an existing domain. */ list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) { - ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, + &handle->handle); if (!ret) { domain->users++; goto out; @@ -113,7 +128,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm goto out_free_handle; } - ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid); + ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid, + &handle->handle); if (ret) goto out_free_domain; domain->users = 1; @@ -121,10 +137,8 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm out: refcount_set(&handle->users, 1); - list_add(&handle->handle_item, &mm->iommu_mm->sva_handles); mutex_unlock(&iommu_sva_lock); handle->dev = dev; - handle->domain = domain; return handle; out_free_domain: @@ -147,7 +161,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device); */ void iommu_sva_unbind_device(struct iommu_sva *handle) { - struct iommu_domain *domain = handle->domain; + struct iommu_domain *domain = handle->handle.domain; struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm; struct device *dev = handle->dev; @@ -156,7 +170,6 @@ void iommu_sva_unbind_device(struct iommu_sva *handle) mutex_unlock(&iommu_sva_lock); return; } - list_del(&handle->handle_item); iommu_detach_device_pasid(domain, dev, iommu_mm->pasid); if (--domain->users == 0) { @@ -170,7 +183,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); u32 iommu_sva_get_pasid(struct iommu_sva *handle) { - struct iommu_domain *domain = handle->domain; + struct iommu_domain *domain = handle->handle.domain; return mm_get_enqcmd_pasid(domain->mm); } @@ -259,7 +272,8 @@ static void iommu_sva_handle_iopf(struct work_struct *work) if (status != IOMMU_PAGE_RESP_SUCCESS) break; - status = iommu_sva_handle_mm(&iopf->fault, group->domain->mm); + status = iommu_sva_handle_mm(&iopf->fault, + group->attach_handle->domain->mm); } iopf_group_response(group, status); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9df7cc75c1bc..8484285fbaa8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group, * @domain: the iommu domain. * @dev: the attached device. * @pasid: the pasid of the device. + * @handle: the attach handle. * * Return: 0 on success, or an error. */ int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { /* Caller must be a probed driver on dev */ struct iommu_group *group = dev->iommu_group; struct group_device *device; - void *curr; int ret; if (!domain->ops->set_dev_pasid) @@ -3382,11 +3383,12 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, } } - curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); - if (curr) { - ret = xa_err(curr) ? : -EBUSY; + if (handle) + handle->domain = domain; + + ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL); + if (ret) goto out_unlock; - } ret = __iommu_set_group_pasid(domain, group, pasid); if (ret) @@ -3414,46 +3416,11 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, mutex_lock(&group->mutex); __iommu_remove_group_pasid(group, pasid, domain); - WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); + xa_erase(&group->pasid_array, pasid); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); -/* - * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev - * @dev: the queried device - * @pasid: the pasid of the device - * @type: matched domain type, 0 for any match - * - * This is a variant of iommu_get_domain_for_dev(). It returns the existing - * domain attached to pasid of a device. Callers must hold a lock around this - * function, and both iommu_attach/detach_dev_pasid() whenever a domain of - * type is being manipulated. This API does not internally resolve races with - * attach/detach. - * - * Return: attached domain on success, NULL otherwise. - */ -struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, - ioasid_t pasid, - unsigned int type) -{ - /* Caller must be a probed driver on dev */ - struct iommu_group *group = dev->iommu_group; - struct iommu_domain *domain; - - if (!group) - return NULL; - - xa_lock(&group->pasid_array); - domain = xa_load(&group->pasid_array, pasid); - if (type && domain && domain->type != type) - domain = ERR_PTR(-EBUSY); - xa_unlock(&group->pasid_array); - - return domain; -} -EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); - ioasid_t iommu_alloc_global_pasid(struct device *dev) { int ret; @@ -3480,3 +3447,137 @@ void iommu_free_global_pasid(ioasid_t pasid) ida_free(&iommu_global_pasid_ida, pasid); } EXPORT_SYMBOL_GPL(iommu_free_global_pasid); + +/** + * iommu_attach_handle_get - Return the attach handle + * @group: the iommu group that domain was attached to + * @pasid: the pasid within the group + * @type: matched domain type, 0 for any match + * + * Return handle or ERR_PTR(-ENOENT) on none, ERR_PTR(-EBUSY) on mismatch. + * + * Return the attach handle to the caller. The life cycle of an iommu attach + * handle is from the time when the domain is attached to the time when the + * domain is detached. Callers are required to synchronize the call of + * iommu_attach_handle_get() with domain attachment and detachment. The attach + * handle can only be used during its life cycle. + */ +struct iommu_attach_handle * +iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type) +{ + struct iommu_attach_handle *handle; + + xa_lock(&group->pasid_array); + handle = xa_load(&group->pasid_array, pasid); + if (!handle) + handle = ERR_PTR(-ENOENT); + else if (type && handle->domain->type != type) + handle = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + + return handle; +} +EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, IOMMUFD_INTERNAL); + +/** + * iommu_attach_group_handle - Attach an IOMMU domain to an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * @handle: attach handle + * + * Returns 0 on success and error code on failure. + * + * This is a variant of iommu_attach_group(). It allows the caller to provide + * an attach handle and use it when the domain is attached. This is currently + * used by IOMMUFD to deliver the I/O page faults. + */ +int iommu_attach_group_handle(struct iommu_domain *domain, + struct iommu_group *group, + struct iommu_attach_handle *handle) +{ + int ret; + + if (handle) + handle->domain = domain; + + mutex_lock(&group->mutex); + ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + if (ret) + goto err_unlock; + + ret = __iommu_attach_group(domain, group); + if (ret) + goto err_erase; + mutex_unlock(&group->mutex); + + return 0; +err_erase: + xa_erase(&group->pasid_array, IOMMU_NO_PASID); +err_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_attach_group_handle, IOMMUFD_INTERNAL); + +/** + * iommu_detach_group_handle - Detach an IOMMU domain from an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * + * Detach the specified IOMMU domain from the specified IOMMU group. + * It must be used in conjunction with iommu_attach_group_handle(). + */ +void iommu_detach_group_handle(struct iommu_domain *domain, + struct iommu_group *group) +{ + mutex_lock(&group->mutex); + __iommu_group_set_core_domain(group); + xa_erase(&group->pasid_array, IOMMU_NO_PASID); + mutex_unlock(&group->mutex); +} +EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, IOMMUFD_INTERNAL); + +/** + * iommu_replace_group_handle - replace the domain that a group is attached to + * @group: IOMMU group that will be attached to the new domain + * @new_domain: new IOMMU domain to replace with + * @handle: attach handle + * + * This is a variant of iommu_group_replace_domain(). It allows the caller to + * provide an attach handle for the new domain and use it when the domain is + * attached. + */ +int iommu_replace_group_handle(struct iommu_group *group, + struct iommu_domain *new_domain, + struct iommu_attach_handle *handle) +{ + void *curr; + int ret; + + if (!new_domain) + return -EINVAL; + + mutex_lock(&group->mutex); + if (handle) { + ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL); + if (ret) + goto err_unlock; + } + + ret = __iommu_group_set_domain(group, new_domain); + if (ret) + goto err_release; + + curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL); + WARN_ON(xa_is_err(curr)); + + mutex_unlock(&group->mutex); + + return 0; +err_release: + xa_release(&group->pasid_array, IOMMU_NO_PASID); +err_unlock: + mutex_unlock(&group->mutex); + return ret; +} +EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, IOMMUFD_INTERNAL); diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 34b446146961..cf4605962bea 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only iommufd-y := \ device.o \ + fault.o \ hw_pagetable.o \ io_pagetable.o \ ioas.o \ diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 873630c111c1..9a7ec5997c61 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -215,6 +215,7 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, refcount_inc(&idev->obj.users); /* igroup refcount moves into iommufd_device */ idev->igroup = igroup; + mutex_init(&idev->iopf_lock); /* * If the caller fails after this success it must call @@ -376,7 +377,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * attachment. */ if (list_empty(&idev->igroup->device_list)) { - rc = iommu_attach_group(hwpt->domain, idev->igroup->group); + rc = iommufd_hwpt_attach_device(hwpt, idev); if (rc) goto err_unresv; idev->igroup->hwpt = hwpt; @@ -402,7 +403,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); if (list_empty(&idev->igroup->device_list)) { - iommu_detach_group(hwpt->domain, idev->igroup->group); + iommufd_hwpt_detach_device(hwpt, idev); idev->igroup->hwpt = NULL; } if (hwpt_is_paging(hwpt)) @@ -497,7 +498,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, goto err_unlock; } - rc = iommu_group_replace_domain(igroup->group, hwpt->domain); + rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt); if (rc) goto err_unresv; diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c new file mode 100644 index 000000000000..54d6cd20a673 --- /dev/null +++ b/drivers/iommu/iommufd/fault.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2024 Intel Corporation + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/iommufd.h> +#include <linux/pci.h> +#include <linux/poll.h> +#include <linux/anon_inodes.h> +#include <uapi/linux/iommufd.h> + +#include "../iommu-priv.h" +#include "iommufd_private.h" + +static int iommufd_fault_iopf_enable(struct iommufd_device *idev) +{ + struct device *dev = idev->dev; + int ret; + + /* + * Once we turn on PCI/PRI support for VF, the response failure code + * should not be forwarded to the hardware due to PRI being a shared + * resource between PF and VFs. There is no coordination for this + * shared capability. This waits for a vPRI reset to recover. + */ + if (dev_is_pci(dev) && to_pci_dev(dev)->is_virtfn) + return -EINVAL; + + mutex_lock(&idev->iopf_lock); + /* Device iopf has already been on. */ + if (++idev->iopf_enabled > 1) { + mutex_unlock(&idev->iopf_lock); + return 0; + } + + ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF); + if (ret) + --idev->iopf_enabled; + mutex_unlock(&idev->iopf_lock); + + return ret; +} + +static void iommufd_fault_iopf_disable(struct iommufd_device *idev) +{ + mutex_lock(&idev->iopf_lock); + if (!WARN_ON(idev->iopf_enabled == 0)) { + if (--idev->iopf_enabled == 0) + iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF); + } + mutex_unlock(&idev->iopf_lock); +} + +static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + int ret; + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->idev = idev; + ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group, + &handle->handle); + if (ret) + kfree(handle); + + return ret; +} + +int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + int ret; + + if (!hwpt->fault) + return -EINVAL; + + ret = iommufd_fault_iopf_enable(idev); + if (ret) + return ret; + + ret = __fault_domain_attach_dev(hwpt, idev); + if (ret) + iommufd_fault_iopf_disable(idev); + + return ret; +} + +static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, + struct iommufd_attach_handle *handle) +{ + struct iommufd_fault *fault = hwpt->fault; + struct iopf_group *group, *next; + unsigned long index; + + if (!fault) + return; + + mutex_lock(&fault->mutex); + list_for_each_entry_safe(group, next, &fault->deliver, node) { + if (group->attach_handle != &handle->handle) + continue; + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + + xa_for_each(&fault->response, index, group) { + if (group->attach_handle != &handle->handle) + continue; + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + mutex_unlock(&fault->mutex); +} + +static struct iommufd_attach_handle * +iommufd_device_get_attach_handle(struct iommufd_device *idev) +{ + struct iommu_attach_handle *handle; + + handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0); + if (!handle) + return NULL; + + return to_iommufd_handle(handle); +} + +void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + struct iommufd_attach_handle *handle; + + handle = iommufd_device_get_attach_handle(idev); + iommu_detach_group_handle(hwpt->domain, idev->igroup->group); + iommufd_auto_response_faults(hwpt, handle); + iommufd_fault_iopf_disable(idev); + kfree(handle); +} + +static int __fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + struct iommufd_attach_handle *handle, *curr = NULL; + int ret; + + if (old->fault) + curr = iommufd_device_get_attach_handle(idev); + + if (hwpt->fault) { + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return -ENOMEM; + + handle->handle.domain = hwpt->domain; + handle->idev = idev; + ret = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, &handle->handle); + } else { + ret = iommu_replace_group_handle(idev->igroup->group, + hwpt->domain, NULL); + } + + if (!ret && curr) { + iommufd_auto_response_faults(old, curr); + kfree(curr); + } + + return ret; +} + +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + bool iopf_off = !hwpt->fault && old->fault; + bool iopf_on = hwpt->fault && !old->fault; + int ret; + + if (iopf_on) { + ret = iommufd_fault_iopf_enable(idev); + if (ret) + return ret; + } + + ret = __fault_domain_replace_dev(idev, hwpt, old); + if (ret) { + if (iopf_on) + iommufd_fault_iopf_disable(idev); + return ret; + } + + if (iopf_off) + iommufd_fault_iopf_disable(idev); + + return 0; +} + +void iommufd_fault_destroy(struct iommufd_object *obj) +{ + struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); + struct iopf_group *group, *next; + + /* + * The iommufd object's reference count is zero at this point. + * We can be confident that no other threads are currently + * accessing this pointer. Therefore, acquiring the mutex here + * is unnecessary. + */ + list_for_each_entry_safe(group, next, &fault->deliver, node) { + list_del(&group->node); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } +} + +static void iommufd_compose_fault_message(struct iommu_fault *fault, + struct iommu_hwpt_pgfault *hwpt_fault, + struct iommufd_device *idev, + u32 cookie) +{ + hwpt_fault->flags = fault->prm.flags; + hwpt_fault->dev_id = idev->obj.id; + hwpt_fault->pasid = fault->prm.pasid; + hwpt_fault->grpid = fault->prm.grpid; + hwpt_fault->perm = fault->prm.perm; + hwpt_fault->addr = fault->prm.addr; + hwpt_fault->length = 0; + hwpt_fault->cookie = cookie; +} + +static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t fault_size = sizeof(struct iommu_hwpt_pgfault); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_pgfault data; + struct iommufd_device *idev; + struct iopf_group *group; + struct iopf_fault *iopf; + size_t done = 0; + int rc = 0; + + if (*ppos || count % fault_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (!list_empty(&fault->deliver) && count > done) { + group = list_first_entry(&fault->deliver, + struct iopf_group, node); + + if (group->fault_count * fault_size > count - done) + break; + + rc = xa_alloc(&fault->response, &group->cookie, group, + xa_limit_32b, GFP_KERNEL); + if (rc) + break; + + idev = to_iommufd_handle(group->attach_handle)->idev; + list_for_each_entry(iopf, &group->faults, list) { + iommufd_compose_fault_message(&iopf->fault, + &data, idev, + group->cookie); + if (copy_to_user(buf + done, &data, fault_size)) { + xa_erase(&fault->response, group->cookie); + rc = -EFAULT; + break; + } + done += fault_size; + } + + list_del(&group->node); + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t response_size = sizeof(struct iommu_hwpt_page_response); + struct iommufd_fault *fault = filep->private_data; + struct iommu_hwpt_page_response response; + struct iopf_group *group; + size_t done = 0; + int rc = 0; + + if (*ppos || count % response_size) + return -ESPIPE; + + mutex_lock(&fault->mutex); + while (count > done) { + rc = copy_from_user(&response, buf + done, response_size); + if (rc) + break; + + group = xa_erase(&fault->response, response.cookie); + if (!group) { + rc = -EINVAL; + break; + } + + iopf_group_response(group, response.code); + iopf_free_group(group); + done += response_size; + } + mutex_unlock(&fault->mutex); + + return done == 0 ? rc : done; +} + +static __poll_t iommufd_fault_fops_poll(struct file *filep, + struct poll_table_struct *wait) +{ + struct iommufd_fault *fault = filep->private_data; + __poll_t pollflags = EPOLLOUT; + + poll_wait(filep, &fault->wait_queue, wait); + mutex_lock(&fault->mutex); + if (!list_empty(&fault->deliver)) + pollflags |= EPOLLIN | EPOLLRDNORM; + mutex_unlock(&fault->mutex); + + return pollflags; +} + +static int iommufd_fault_fops_release(struct inode *inode, struct file *filep) +{ + struct iommufd_fault *fault = filep->private_data; + + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); + return 0; +} + +static const struct file_operations iommufd_fault_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .read = iommufd_fault_fops_read, + .write = iommufd_fault_fops_write, + .poll = iommufd_fault_fops_poll, + .release = iommufd_fault_fops_release, + .llseek = no_llseek, +}; + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) +{ + struct iommu_fault_alloc *cmd = ucmd->cmd; + struct iommufd_fault *fault; + struct file *filep; + int fdno; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT); + if (IS_ERR(fault)) + return PTR_ERR(fault); + + fault->ictx = ucmd->ictx; + INIT_LIST_HEAD(&fault->deliver); + xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); + mutex_init(&fault->mutex); + init_waitqueue_head(&fault->wait_queue); + + filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, + fault, O_RDWR); + if (IS_ERR(filep)) { + rc = PTR_ERR(filep); + goto out_abort; + } + + refcount_inc(&fault->obj.users); + iommufd_ctx_get(fault->ictx); + fault->filep = filep; + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + rc = fdno; + goto out_fput; + } + + cmd->out_fault_id = fault->obj.id; + cmd->out_fault_fd = fdno; + + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put_fdno; + iommufd_object_finalize(ucmd->ictx, &fault->obj); + + fd_install(fdno, fault->filep); + + return 0; +out_put_fdno: + put_unused_fd(fdno); +out_fput: + fput(filep); + refcount_dec(&fault->obj.users); + iommufd_ctx_put(fault->ictx); +out_abort: + iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); + + return rc; +} + +int iommufd_fault_iopf_handler(struct iopf_group *group) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_fault *fault; + + hwpt = group->attach_handle->domain->fault_data; + fault = hwpt->fault; + + mutex_lock(&fault->mutex); + list_add_tail(&group->node, &fault->deliver); + mutex_unlock(&fault->mutex); + + wake_up_interruptible(&fault->wait_queue); + + return 0; +} diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 33d142f8057d..e63f80f087d1 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -8,6 +8,15 @@ #include "../iommu-priv.h" #include "iommufd_private.h" +static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt) +{ + if (hwpt->domain) + iommu_domain_free(hwpt->domain); + + if (hwpt->fault) + refcount_dec(&hwpt->fault->obj.users); +} + void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) { struct iommufd_hwpt_paging *hwpt_paging = @@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) hwpt_paging->common.domain); } - if (hwpt_paging->common.domain) - iommu_domain_free(hwpt_paging->common.domain); - + __iommufd_hwpt_destroy(&hwpt_paging->common); refcount_dec(&hwpt_paging->ioas->obj.users); } @@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj) struct iommufd_hwpt_nested *hwpt_nested = container_of(obj, struct iommufd_hwpt_nested, common.obj); - if (hwpt_nested->common.domain) - iommu_domain_free(hwpt_nested->common.domain); - + __iommufd_hwpt_destroy(&hwpt_nested->common); refcount_dec(&hwpt_nested->parent->common.obj.users); } @@ -213,7 +218,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt; int rc; - if (flags || !user_data->len || !ops->domain_alloc_user) + if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) || + !user_data->len || !ops->domain_alloc_user) return ERR_PTR(-EOPNOTSUPP); if (parent->auto_domain || !parent->nest_parent) return ERR_PTR(-EINVAL); @@ -227,7 +233,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, refcount_inc(&parent->common.obj.users); hwpt_nested->parent = parent; - hwpt->domain = ops->domain_alloc_user(idev->dev, flags, + hwpt->domain = ops->domain_alloc_user(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, parent->common.domain, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); @@ -308,6 +315,21 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) goto out_put_pt; } + if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) { + struct iommufd_fault *fault; + + fault = iommufd_get_fault(ucmd, cmd->fault_id); + if (IS_ERR(fault)) { + rc = PTR_ERR(fault); + goto out_hwpt; + } + hwpt->fault = fault; + hwpt->domain->iopf_handler = iommufd_fault_iopf_handler; + hwpt->domain->fault_data = hwpt; + refcount_inc(&fault->obj.users); + iommufd_put_object(ucmd->ictx, &fault->obj); + } + cmd->out_hwpt_id = hwpt->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 991f864d1f9b..92efe30a8f0d 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -11,6 +11,7 @@ #include <linux/iommu.h> #include <linux/iova_bitmap.h> #include <uapi/linux/iommufd.h> +#include "../iommu-priv.h" struct iommu_domain; struct iommu_group; @@ -128,6 +129,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, + IOMMUFD_OBJ_FAULT, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif @@ -292,6 +294,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt, struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; + struct iommufd_fault *fault; }; struct iommufd_hwpt_paging { @@ -395,6 +398,9 @@ struct iommufd_device { /* always the physical device */ struct device *dev; bool enforce_cache_coherency; + /* protect iopf_enabled counter */ + struct mutex iopf_lock; + unsigned int iopf_enabled; }; static inline struct iommufd_device * @@ -426,6 +432,80 @@ void iopt_remove_access(struct io_pagetable *iopt, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); +/* + * An iommufd_fault object represents an interface to deliver I/O page faults + * to the user space. These objects are created/destroyed by the user space and + * associated with hardware page table objects during page-table allocation. + */ +struct iommufd_fault { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct file *filep; + + /* The lists of outstanding faults protected by below mutex. */ + struct mutex mutex; + struct list_head deliver; + struct xarray response; + + struct wait_queue_head wait_queue; +}; + +struct iommufd_attach_handle { + struct iommu_attach_handle handle; + struct iommufd_device *idev; +}; + +/* Convert an iommu attach handle to iommufd handle. */ +#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle) + +static inline struct iommufd_fault * +iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_FAULT), + struct iommufd_fault, obj); +} + +int iommufd_fault_alloc(struct iommufd_ucmd *ucmd); +void iommufd_fault_destroy(struct iommufd_object *obj); +int iommufd_fault_iopf_handler(struct iopf_group *group); + +int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev); +void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev); +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old); + +static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + if (hwpt->fault) + return iommufd_fault_domain_attach_dev(hwpt, idev); + + return iommu_attach_group(hwpt->domain, idev->igroup->group); +} + +static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev) +{ + if (hwpt->fault) + iommufd_fault_domain_detach_dev(hwpt, idev); + + iommu_detach_group(hwpt->domain, idev->igroup->group); +} + +static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + struct iommufd_hw_pagetable *old) +{ + if (old->fault || hwpt->fault) + return iommufd_fault_domain_replace_dev(idev, hwpt, old); + + return iommu_group_replace_domain(idev->igroup->group, hwpt->domain); +} + #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index e854d3f67205..acbbba1c6671 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -22,6 +22,7 @@ enum { IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, IOMMU_TEST_OP_DIRTY, IOMMU_TEST_OP_MD_CHECK_IOTLB, + IOMMU_TEST_OP_TRIGGER_IOPF, }; enum { @@ -127,6 +128,13 @@ struct iommu_test_cmd { __u32 id; __u32 iotlb; } check_iotlb; + struct { + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + } trigger_iopf; }; __u32 last; }; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 39b32932c61e..83bbd7c5d160 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd) union ucmd_buffer { struct iommu_destroy destroy; + struct iommu_fault_alloc fault; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; @@ -355,6 +356,8 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, + out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, @@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, + [IOMMUFD_OBJ_FAULT] = { + .destroy = iommufd_fault_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 7a70a3e0fee6..f95e32e29133 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -504,6 +504,8 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) return false; } +static struct iopf_queue *mock_iommu_iopf_queue; + static struct iommu_device mock_iommu_device = { }; @@ -514,6 +516,29 @@ static struct iommu_device *mock_probe_device(struct device *dev) return &mock_iommu_device; } +static void mock_domain_page_response(struct device *dev, struct iopf_fault *evt, + struct iommu_page_response *msg) +{ +} + +static int mock_dev_enable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + return -ENODEV; + + return iopf_queue_add_device(mock_iommu_iopf_queue, dev); +} + +static int mock_dev_disable_feat(struct device *dev, enum iommu_dev_features feat) +{ + if (feat != IOMMU_DEV_FEAT_IOPF || !mock_iommu_iopf_queue) + return -ENODEV; + + iopf_queue_remove_device(mock_iommu_iopf_queue, dev); + + return 0; +} + static const struct iommu_ops mock_ops = { /* * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() @@ -529,6 +554,10 @@ static const struct iommu_ops mock_ops = { .capable = mock_domain_capable, .device_group = generic_device_group, .probe_device = mock_probe_device, + .page_response = mock_domain_page_response, + .dev_enable_feat = mock_dev_enable_feat, + .dev_disable_feat = mock_dev_disable_feat, + .user_pasid_table = true, .default_domain_ops = &(struct iommu_domain_ops){ .free = mock_domain_free, @@ -1375,6 +1404,31 @@ out_put: return rc; } +static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + struct iopf_fault event = { }; + struct iommufd_device *idev; + + idev = iommufd_get_device(ucmd, cmd->trigger_iopf.dev_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + event.fault.prm.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; + if (cmd->trigger_iopf.pasid != IOMMU_NO_PASID) + event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; + event.fault.type = IOMMU_FAULT_PAGE_REQ; + event.fault.prm.addr = cmd->trigger_iopf.addr; + event.fault.prm.pasid = cmd->trigger_iopf.pasid; + event.fault.prm.grpid = cmd->trigger_iopf.grpid; + event.fault.prm.perm = cmd->trigger_iopf.perm; + + iommu_report_device_fault(idev->dev, &event); + iommufd_put_object(ucmd->ictx, &idev->obj); + + return 0; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); @@ -1450,6 +1504,8 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->dirty.page_size, u64_to_user_ptr(cmd->dirty.uptr), cmd->dirty.flags); + case IOMMU_TEST_OP_TRIGGER_IOPF: + return iommufd_test_trigger_iopf(ucmd, cmd); default: return -EOPNOTSUPP; } @@ -1491,6 +1547,9 @@ int __init iommufd_test_init(void) &iommufd_mock_bus_type.nb); if (rc) goto err_sysfs; + + mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq"); + return 0; err_sysfs: @@ -1506,6 +1565,11 @@ err_dbgfs: void iommufd_test_exit(void) { + if (mock_iommu_iopf_queue) { + iopf_queue_free(mock_iommu_iopf_queue); + mock_iommu_iopf_queue = NULL; + } + iommu_device_sysfs_remove(&mock_iommu_device); iommu_device_unregister_bus(&mock_iommu_device, &iommufd_mock_bus_type.bus, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..73bc3aee95a1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,12 +124,16 @@ struct iopf_fault { struct iopf_group { struct iopf_fault last_fault; struct list_head faults; + size_t fault_count; /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; - struct iommu_domain *domain; + struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; + /* Used by handler provider to hook the group on its own lists. */ + struct list_head node; + u32 cookie; }; /** @@ -547,6 +551,10 @@ static inline int __iommu_copy_struct_from_user_array( * @default_domain: If not NULL this will always be set as the default domain. * This should be an IDENTITY/BLOCKED/PLATFORM domain. * Do not use in new drivers. + * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is + * no user domain for each PASID and the I/O page faults are + * forwarded through the user domain attached to the device + * RID. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -590,6 +598,7 @@ struct iommu_ops { struct iommu_domain *blocked_domain; struct iommu_domain *release_domain; struct iommu_domain *default_domain; + u8 user_pasid_table:1; }; /** @@ -989,20 +998,28 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* + * An iommu attach handle represents a relationship between an iommu domain + * and a PASID or RID of a device. It is allocated and managed by the component + * that manages the domain and is stored in the iommu group during the time the + * domain is attached. + */ +struct iommu_attach_handle { + struct iommu_domain *domain; +}; + /** * struct iommu_sva - handle to a device-mm bond */ struct iommu_sva { + struct iommu_attach_handle handle; struct device *dev; - struct iommu_domain *domain; - struct list_head handle_item; refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; - struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, @@ -1052,12 +1069,10 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ @@ -1388,7 +1403,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) } static inline int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { return -ENODEV; } @@ -1398,13 +1414,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain, { } -static inline struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type) -{ - return NULL; -} - static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) { return IOMMU_PASID_INVALID; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 1dfeaa2e649e..ede2b464a761 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -50,6 +50,7 @@ enum { IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_INVALIDATE, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC, }; /** @@ -356,10 +357,13 @@ struct iommu_vfio_ioas { * the parent HWPT in a nesting configuration. * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is * enforced on device attachment + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is + * valid. */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, + IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, }; /** @@ -411,6 +415,9 @@ enum iommu_hwpt_data_type { * @data_type: One of enum iommu_hwpt_data_type * @data_len: Length of the type specific data * @data_uptr: User pointer to the type specific data + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of + * IOMMU_HWPT_FAULT_ID_VALID is set. + * @__reserved2: Padding to 64-bit alignment. Must be 0. * * Explicitly allocate a hardware page table object. This is the same object * type that is returned by iommufd_device_attach() and represents the @@ -441,6 +448,8 @@ struct iommu_hwpt_alloc { __u32 data_type; __u32 data_len; __aligned_u64 data_uptr; + __u32 fault_id; + __u32 __reserved2; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) @@ -692,4 +701,104 @@ struct iommu_hwpt_invalidate { __u32 __reserved; }; #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) + +/** + * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is + * valid. + * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. + */ +enum iommu_hwpt_pgfault_flags { + IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), + IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), +}; + +/** + * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_PERM_READ: request for read permission + * @IOMMU_PGFAULT_PERM_WRITE: request for write permission + * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the + * Execute Requested bit set in PASID TLP Prefix. + * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the + * Privileged Mode Requested bit set in PASID TLP + * Prefix. + */ +enum iommu_hwpt_pgfault_perm { + IOMMU_PGFAULT_PERM_READ = (1 << 0), + IOMMU_PGFAULT_PERM_WRITE = (1 << 1), + IOMMU_PGFAULT_PERM_EXEC = (1 << 2), + IOMMU_PGFAULT_PERM_PRIV = (1 << 3), +}; + +/** + * struct iommu_hwpt_pgfault - iommu page fault data + * @flags: Combination of enum iommu_hwpt_pgfault_flags + * @dev_id: id of the originated device + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @addr: Fault address + * @length: a hint of how much data the requestor is expecting to fetch. For + * example, if the PRI initiator knows it is going to do a 10MB + * transfer, it could fill in 10MB and the OS could pre-fault in + * 10MB of IOVA. It's default to 0 if there's no such hint. + * @cookie: kernel-managed cookie identifying a group of fault messages. The + * cookie number encoded in the last page fault of the group should + * be echoed back in the response message. + */ +struct iommu_hwpt_pgfault { + __u32 flags; + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u32 length; + __u32 cookie; +}; + +/** + * enum iommufd_page_response_code - Return status of fault handlers + * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is the + * "Success" defined in PCI 10.4.2.1. + * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is the "Invalid Request" in PCI + * 10.4.2.1. + * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is the "Response + * Failure" in PCI 10.4.2.1. + */ +enum iommufd_page_response_code { + IOMMUFD_PAGE_RESP_SUCCESS = 0, + IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_hwpt_page_response - IOMMU page fault response + * @cookie: The kernel-managed cookie reported in the fault message. + * @code: One of response code in enum iommufd_page_response_code. + */ +struct iommu_hwpt_page_response { + __u32 cookie; + __u32 code; +}; + +/** + * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) + * @size: sizeof(struct iommu_fault_alloc) + * @flags: Must be 0 + * @out_fault_id: The ID of the new FAULT + * @out_fault_fd: The fd of the new FAULT + * + * Explicitly allocate a fault handling object. + */ +struct iommu_fault_alloc { + __u32 size; + __u32 flags; + __u32 out_fault_id; + __u32 out_fault_fd; +}; +#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) #endif diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 5f7d5a5ba89b..6343f4053bd4 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id_not_work = 0; uint32_t test_hwpt_id = 0; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; if (self->device_id) { /* Negative tests */ @@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) sizeof(data)); /* Allocate two nested hwpts sharing one common parent hwpt */ + test_ioctl_fault_alloc(&fault_id, &fault_fd); test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, &nested_hwpt_id[0], IOMMU_HWPT_DATA_SELFTEST, &data, @@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &nested_hwpt_id[1], IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id, + UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, + &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], IOMMU_TEST_IOTLB_DEFAULT); test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], @@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) _test_ioctl_destroy(self->fd, nested_hwpt_id[1])); test_ioctl_destroy(nested_hwpt_id[0]); + /* Switch from nested_hwpt_id[1] to iopf_hwpt_id */ + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + /* Trigger an IOPF on the device */ + test_cmd_trigger_iopf(self->device_id, fault_fd); + /* Detach from nested_hwpt_id[1] and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); test_ioctl_destroy(nested_hwpt_id[1]); + test_ioctl_destroy(iopf_hwpt_id); /* Detach from the parent hw_pagetable and destroy it */ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); test_ioctl_destroy(parent_hwpt_id); test_ioctl_destroy(parent_hwpt_id_not_work); + close(fault_fd); + test_ioctl_destroy(fault_id); } else { test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0, &parent_hwpt_id); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index f590417cd67a..c5d5e69452b0 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id, + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id, IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index c612fbf0195b..40f6f14ce136 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -155,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ pt_id, NULL)) -static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, +static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id, __u32 flags, __u32 *hwpt_id, __u32 data_type, void *data, size_t data_len) { @@ -167,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, .data_type = data_type, .data_len = data_len, .data_uptr = (uint64_t)data, + .fault_id = ft_id, }; int ret; @@ -179,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, } #define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \ 0)) #define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \ - self->fd, device_id, pt_id, flags, \ + self->fd, device_id, pt_id, 0, flags, \ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0)) #define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) #define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \ data_type, data, data_len) \ EXPECT_ERRNO(_errno, \ - _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \ hwpt_id, data_type, data, data_len)) +#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \ + data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) +#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \ + hwpt_id, data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \ + flags, hwpt_id, data_type, data, \ + data_len)) + #define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ ({ \ struct iommu_test_cmd test_cmd = { \ @@ -686,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, #define test_cmd_get_hw_capabilities(device_id, caps, mask) \ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) + +static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd) +{ + struct iommu_fault_alloc cmd = { + .size = sizeof(cmd), + }; + int ret; + + ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd); + if (ret) + return ret; + *fault_id = cmd.out_fault_id; + *fault_fd = cmd.out_fault_fd; + return 0; +} + +#define test_ioctl_fault_alloc(fault_id, fault_fd) \ + ({ \ + ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \ + fault_fd)); \ + ASSERT_NE(0, *(fault_id)); \ + ASSERT_NE(0, *(fault_fd)); \ + }) + +static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) +{ + struct iommu_test_cmd trigger_iopf_cmd = { + .size = sizeof(trigger_iopf_cmd), + .op = IOMMU_TEST_OP_TRIGGER_IOPF, + .trigger_iopf = { + .dev_id = device_id, + .pasid = 0x1, + .grpid = 0x2, + .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE, + .addr = 0xdeadbeaf, + }, + }; + struct iommu_hwpt_page_response response = { + .code = IOMMUFD_PAGE_RESP_SUCCESS, + }; + struct iommu_hwpt_pgfault fault = {}; + ssize_t bytes; + int ret; + + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd); + if (ret) + return ret; + + bytes = read(fault_fd, &fault, sizeof(fault)); + if (bytes <= 0) + return -EIO; + + response.cookie = fault.cookie; + + bytes = write(fault_fd, &response, sizeof(response)); + if (bytes <= 0) + return -EIO; + + return 0; +} + +#define test_cmd_trigger_iopf(device_id, fault_fd) \ + ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) |