diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2021-10-15 08:40:50 -0300 |
---|---|---|
committer | Alex Williamson <alex.williamson@redhat.com> | 2021-10-15 13:58:19 -0600 |
commit | 63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7 (patch) | |
tree | d182118527f0f187712de12a3e9026dfe03f2419 /drivers/vfio | |
parent | 48f06ca420c350635423685b40cf27374380bac4 (diff) | |
download | lwn-63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7.tar.gz lwn-63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7.zip |
vfio: Delete vfio_get/put_group from vfio_iommu_group_notifier()
iommu_group_register_notifier()/iommu_group_unregister_notifier() are
built using a blocking_notifier_chain which integrates a rwsem. The
notifier function cannot be running outside its registration.
When considering how the notifier function interacts with create/destroy
of the group there are two fringe cases, the notifier starts before
list_add(&vfio.group_list) and the notifier runs after the kref
becomes 0.
Prior to vfio_create_group() unlocking and returning we have
container_users == 0
device_list == empty
And this cannot change until the mutex is unlocked.
After the kref goes to zero we must also have
container_users == 0
device_list == empty
Both are required because they are balanced operations and a 0 kref means
some caller became unbalanced. Add the missing assertion that
container_users must be zero as well.
These two facts are important because when checking each operation we see:
- IOMMU_GROUP_NOTIFY_ADD_DEVICE
Empty device_list avoids the WARN_ON in vfio_group_nb_add_dev()
0 container_users ends the call
- IOMMU_GROUP_NOTIFY_BOUND_DRIVER
0 container_users ends the call
Finally, we have IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER, which only deletes
items from the unbound list. During creation this list is empty, during
kref == 0 nothing can read this list, and it will be freed soon.
Since the vfio_group_release() doesn't hold the appropriate lock to
manipulate the unbound_list and could race with the notifier, move the
cleanup to directly before the kfree.
This allows deleting all of the deferred group put code.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Liu Yi L <yi.l.liu@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1-v3-2fdfe4ca2cc6+18c-vfio_group_cdev_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/vfio.c | 100 |
1 files changed, 15 insertions, 85 deletions
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 08b27b64f0f9..4ce7e9fe43af 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -324,12 +324,16 @@ static void vfio_container_put(struct vfio_container *container) static void vfio_group_unlock_and_free(struct vfio_group *group) { + struct vfio_unbound_dev *unbound, *tmp; + mutex_unlock(&vfio.group_lock); - /* - * Unregister outside of lock. A spurious callback is harmless now - * that the group is no longer in vfio.group_list. - */ iommu_group_unregister_notifier(group->iommu_group, &group->nb); + + list_for_each_entry_safe(unbound, tmp, + &group->unbound_list, unbound_next) { + list_del(&unbound->unbound_next); + kfree(unbound); + } kfree(group); } @@ -360,14 +364,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); group->nb.notifier_call = vfio_iommu_group_notifier; - - /* - * blocking notifiers acquire a rwsem around registering and hold - * it around callback. Therefore, need to register outside of - * vfio.group_lock to avoid A-B/B-A contention. Our callback won't - * do anything unless it can find the group in vfio.group_list, so - * no harm in registering early. - */ ret = iommu_group_register_notifier(iommu_group, &group->nb); if (ret) { kfree(group); @@ -415,18 +411,18 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, static void vfio_group_release(struct kref *kref) { struct vfio_group *group = container_of(kref, struct vfio_group, kref); - struct vfio_unbound_dev *unbound, *tmp; struct iommu_group *iommu_group = group->iommu_group; + /* + * These data structures all have paired operations that can only be + * undone when the caller holds a live reference on the group. Since all + * pairs must be undone these WARN_ON's indicate some caller did not + * properly hold the group reference. + */ WARN_ON(!list_empty(&group->device_list)); + WARN_ON(atomic_read(&group->container_users)); WARN_ON(group->notifier.head); - list_for_each_entry_safe(unbound, tmp, - &group->unbound_list, unbound_next) { - list_del(&unbound->unbound_next); - kfree(unbound); - } - device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor)); list_del(&group->vfio_next); vfio_free_group_minor(group->minor); @@ -439,61 +435,12 @@ static void vfio_group_put(struct vfio_group *group) kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); } -struct vfio_group_put_work { - struct work_struct work; - struct vfio_group *group; -}; - -static void vfio_group_put_bg(struct work_struct *work) -{ - struct vfio_group_put_work *do_work; - - do_work = container_of(work, struct vfio_group_put_work, work); - - vfio_group_put(do_work->group); - kfree(do_work); -} - -static void vfio_group_schedule_put(struct vfio_group *group) -{ - struct vfio_group_put_work *do_work; - - do_work = kmalloc(sizeof(*do_work), GFP_KERNEL); - if (WARN_ON(!do_work)) - return; - - INIT_WORK(&do_work->work, vfio_group_put_bg); - do_work->group = group; - schedule_work(&do_work->work); -} - /* Assume group_lock or group reference is held */ static void vfio_group_get(struct vfio_group *group) { kref_get(&group->kref); } -/* - * Not really a try as we will sleep for mutex, but we need to make - * sure the group pointer is valid under lock and get a reference. - */ -static struct vfio_group *vfio_group_try_get(struct vfio_group *group) -{ - struct vfio_group *target = group; - - mutex_lock(&vfio.group_lock); - list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group == target) { - vfio_group_get(group); - mutex_unlock(&vfio.group_lock); - return group; - } - } - mutex_unlock(&vfio.group_lock); - - return NULL; -} - static struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group) { @@ -691,14 +638,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, struct device *dev = data; struct vfio_unbound_dev *unbound; - /* - * Need to go through a group_lock lookup to get a reference or we - * risk racing a group being removed. Ignore spurious notifies. - */ - group = vfio_group_try_get(group); - if (!group) - return NOTIFY_OK; - switch (action) { case IOMMU_GROUP_NOTIFY_ADD_DEVICE: vfio_group_nb_add_dev(group, dev); @@ -749,15 +688,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, mutex_unlock(&group->unbound_lock); break; } - - /* - * If we're the last reference to the group, the group will be - * released, which includes unregistering the iommu group notifier. - * We hold a read-lock on that notifier list, unregistering needs - * a write-lock... deadlock. Release our reference asynchronously - * to avoid that situation. - */ - vfio_group_schedule_put(group); return NOTIFY_OK; } |