diff options
author | Dragos Tatulea <dtatulea@nvidia.com> | 2024-08-16 12:01:55 +0300 |
---|---|---|
committer | Michael S. Tsirkin <mst@redhat.com> | 2024-09-25 07:07:42 -0400 |
commit | dcf3eac01f063df0a60ea779399331d2ac535784 (patch) | |
tree | 1940d51e8daeced91b44db924ec14fb22347b96f /drivers | |
parent | 61674c154bb7f19fad612242022276e8bd9e10d2 (diff) | |
download | lwn-dcf3eac01f063df0a60ea779399331d2ac535784.tar.gz lwn-dcf3eac01f063df0a60ea779399331d2ac535784.zip |
vdpa/mlx5: Parallelize device suspend
Currently device suspend works on vqs serially. Building up on previous
changes that converted vq operations to the async api, this patch
parallelizes the device suspend:
1) Suspend all active vqs parallel.
2) Query suspended vqs in parallel.
For 1 vDPA device x 32 VQs (16 VQPs) attached to a large VM (256 GB RAM,
32 CPUs x 2 threads per core), the device suspend time is reduced from
~37 ms to ~13 ms.
A later patch will remove the link unregister operation which will make
it even faster.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20240816090159.1967650-7-dtatulea@nvidia.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Lei Yang <leiyang@redhat.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 56 |
1 files changed, 29 insertions, 27 deletions
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index de05b941e9d1..17f74b1f0644 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1630,49 +1630,51 @@ done: return err; } -static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) { - struct mlx5_virtq_attr attr; + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_virtq_attr *attrs; + int vq_idx, i; int err; + if (start_vq >= ndev->cur_num_vqs) + return -EINVAL; + + mvq = &ndev->vqs[start_vq]; if (!mvq->initialized) return 0; if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) return 0; - err = modify_virtqueues(ndev, mvq->index, 1, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); - if (err) { - mlx5_vdpa_err(&ndev->mvdev, "modify to suspend failed, err: %d\n", err); - return err; - } - - err = query_virtqueues(ndev, mvq->index, 1, &attr); - if (err) { - mlx5_vdpa_err(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err); + err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); + if (err) return err; - } - - mvq->avail_idx = attr.available_index; - mvq->used_idx = attr.used_index; - - return 0; -} -static int suspend_vqs(struct mlx5_vdpa_net *ndev) -{ - int err = 0; - int i; + attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL); + if (!attrs) + return -ENOMEM; - for (i = 0; i < ndev->cur_num_vqs; i++) { - int local_err = suspend_vq(ndev, &ndev->vqs[i]); + err = query_virtqueues(ndev, start_vq, num_vqs, attrs); + if (err) + goto done; - err = local_err ? local_err : err; + for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) { + mvq = &ndev->vqs[vq_idx]; + mvq->avail_idx = attrs[i].available_index; + mvq->used_idx = attrs[i].used_index; } +done: + kfree(attrs); return err; } +static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + return suspend_vqs(ndev, mvq->index, 1); +} + static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) { int err; @@ -3053,7 +3055,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, bool teardown = !is_resumable(ndev); int err; - suspend_vqs(ndev); + suspend_vqs(ndev, 0, ndev->cur_num_vqs); if (teardown) { err = save_channels_info(ndev); if (err) @@ -3606,7 +3608,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) down_write(&ndev->reslock); unregister_link_notifier(ndev); - err = suspend_vqs(ndev); + err = suspend_vqs(ndev, 0, ndev->cur_num_vqs); mlx5_vdpa_cvq_suspend(mvdev); mvdev->suspended = true; up_write(&ndev->reslock); |