summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2021-10-15 08:40:50 -0300
committerAlex Williamson <alex.williamson@redhat.com>2021-10-15 13:58:19 -0600
commit63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7 (patch)
treed182118527f0f187712de12a3e9026dfe03f2419 /drivers
parent48f06ca420c350635423685b40cf27374380bac4 (diff)
downloadlwn-63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7.tar.gz
lwn-63b150fde7a2549e8bf7cb0fca7e9cfb9cad50d7.zip
vfio: Delete vfio_get/put_group from vfio_iommu_group_notifier()
iommu_group_register_notifier()/iommu_group_unregister_notifier() are built using a blocking_notifier_chain which integrates a rwsem. The notifier function cannot be running outside its registration. When considering how the notifier function interacts with create/destroy of the group there are two fringe cases, the notifier starts before list_add(&vfio.group_list) and the notifier runs after the kref becomes 0. Prior to vfio_create_group() unlocking and returning we have container_users == 0 device_list == empty And this cannot change until the mutex is unlocked. After the kref goes to zero we must also have container_users == 0 device_list == empty Both are required because they are balanced operations and a 0 kref means some caller became unbalanced. Add the missing assertion that container_users must be zero as well. These two facts are important because when checking each operation we see: - IOMMU_GROUP_NOTIFY_ADD_DEVICE Empty device_list avoids the WARN_ON in vfio_group_nb_add_dev() 0 container_users ends the call - IOMMU_GROUP_NOTIFY_BOUND_DRIVER 0 container_users ends the call Finally, we have IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER, which only deletes items from the unbound list. During creation this list is empty, during kref == 0 nothing can read this list, and it will be freed soon. Since the vfio_group_release() doesn't hold the appropriate lock to manipulate the unbound_list and could race with the notifier, move the cleanup to directly before the kfree. This allows deleting all of the deferred group put code. Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Liu Yi L <yi.l.liu@intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/1-v3-2fdfe4ca2cc6+18c-vfio_group_cdev_jgg@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/vfio/vfio.c100
1 files changed, 15 insertions, 85 deletions
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 08b27b64f0f9..4ce7e9fe43af 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -324,12 +324,16 @@ static void vfio_container_put(struct vfio_container *container)
static void vfio_group_unlock_and_free(struct vfio_group *group)
{
+ struct vfio_unbound_dev *unbound, *tmp;
+
mutex_unlock(&vfio.group_lock);
- /*
- * Unregister outside of lock. A spurious callback is harmless now
- * that the group is no longer in vfio.group_list.
- */
iommu_group_unregister_notifier(group->iommu_group, &group->nb);
+
+ list_for_each_entry_safe(unbound, tmp,
+ &group->unbound_list, unbound_next) {
+ list_del(&unbound->unbound_next);
+ kfree(unbound);
+ }
kfree(group);
}
@@ -360,14 +364,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
group->nb.notifier_call = vfio_iommu_group_notifier;
-
- /*
- * blocking notifiers acquire a rwsem around registering and hold
- * it around callback. Therefore, need to register outside of
- * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
- * do anything unless it can find the group in vfio.group_list, so
- * no harm in registering early.
- */
ret = iommu_group_register_notifier(iommu_group, &group->nb);
if (ret) {
kfree(group);
@@ -415,18 +411,18 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
static void vfio_group_release(struct kref *kref)
{
struct vfio_group *group = container_of(kref, struct vfio_group, kref);
- struct vfio_unbound_dev *unbound, *tmp;
struct iommu_group *iommu_group = group->iommu_group;
+ /*
+ * These data structures all have paired operations that can only be
+ * undone when the caller holds a live reference on the group. Since all
+ * pairs must be undone these WARN_ON's indicate some caller did not
+ * properly hold the group reference.
+ */
WARN_ON(!list_empty(&group->device_list));
+ WARN_ON(atomic_read(&group->container_users));
WARN_ON(group->notifier.head);
- list_for_each_entry_safe(unbound, tmp,
- &group->unbound_list, unbound_next) {
- list_del(&unbound->unbound_next);
- kfree(unbound);
- }
-
device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
list_del(&group->vfio_next);
vfio_free_group_minor(group->minor);
@@ -439,61 +435,12 @@ static void vfio_group_put(struct vfio_group *group)
kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
}
-struct vfio_group_put_work {
- struct work_struct work;
- struct vfio_group *group;
-};
-
-static void vfio_group_put_bg(struct work_struct *work)
-{
- struct vfio_group_put_work *do_work;
-
- do_work = container_of(work, struct vfio_group_put_work, work);
-
- vfio_group_put(do_work->group);
- kfree(do_work);
-}
-
-static void vfio_group_schedule_put(struct vfio_group *group)
-{
- struct vfio_group_put_work *do_work;
-
- do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
- if (WARN_ON(!do_work))
- return;
-
- INIT_WORK(&do_work->work, vfio_group_put_bg);
- do_work->group = group;
- schedule_work(&do_work->work);
-}
-
/* Assume group_lock or group reference is held */
static void vfio_group_get(struct vfio_group *group)
{
kref_get(&group->kref);
}
-/*
- * Not really a try as we will sleep for mutex, but we need to make
- * sure the group pointer is valid under lock and get a reference.
- */
-static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
-{
- struct vfio_group *target = group;
-
- mutex_lock(&vfio.group_lock);
- list_for_each_entry(group, &vfio.group_list, vfio_next) {
- if (group == target) {
- vfio_group_get(group);
- mutex_unlock(&vfio.group_lock);
- return group;
- }
- }
- mutex_unlock(&vfio.group_lock);
-
- return NULL;
-}
-
static
struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
{
@@ -691,14 +638,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
struct device *dev = data;
struct vfio_unbound_dev *unbound;
- /*
- * Need to go through a group_lock lookup to get a reference or we
- * risk racing a group being removed. Ignore spurious notifies.
- */
- group = vfio_group_try_get(group);
- if (!group)
- return NOTIFY_OK;
-
switch (action) {
case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
vfio_group_nb_add_dev(group, dev);
@@ -749,15 +688,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
mutex_unlock(&group->unbound_lock);
break;
}
-
- /*
- * If we're the last reference to the group, the group will be
- * released, which includes unregistering the iommu group notifier.
- * We hold a read-lock on that notifier list, unregistering needs
- * a write-lock... deadlock. Release our reference asynchronously
- * to avoid that situation.
- */
- vfio_group_schedule_put(group);
return NOTIFY_OK;
}