summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-18 16:44:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-18 16:44:03 -0800
commit0b7359ccddaaa844044c62000734f0cb92ab6310 (patch)
treef37be5ed54ec934208fbcb0d2af81c81d364d6c6 /drivers
parentda3c45c721e2807b2effdea8f41ece96bbf47b15 (diff)
parentf16d65124380ac6de8055c4a8e5373a1043bb09b (diff)
downloadlwn-0b7359ccddaaa844044c62000734f0cb92ab6310.tar.gz
lwn-0b7359ccddaaa844044c62000734f0cb92ab6310.zip
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - vdpa/mlx5: support for resumable vqs - virtio_scsi: mq_poll support - 3virtio_pmem: support SHMEM_REGION - virtio_balloon: stay awake while adjusting balloon - virtio: support for no-reset virtio PCI PM - Fixes, cleanups * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: vdpa/mlx5: Add mkey leak detection vdpa/mlx5: Introduce reference counting to mrs vdpa/mlx5: Use vq suspend/resume during .set_map vdpa/mlx5: Mark vq state for modification in hw vq vdpa/mlx5: Mark vq addrs for modification in hw vq vdpa/mlx5: Introduce per vq and device resume vdpa/mlx5: Allow modifying multiple vq fields in one modify command vdpa/mlx5: Expose resumable vq capability vdpa: Block vq property changes in DRIVER_OK vdpa: Track device suspended state scsi: virtio_scsi: Add mq_poll support virtio_pmem: support feature SHMEM_REGION virtio_balloon: stay awake while adjusting balloon vdpa: Remove usage of the deprecated ida_simple_xx() API virtio: Add support for no-reset virtio PCI PM virtio_net: fix missing dma unmap for resize vhost-vdpa: account iommu allocations vdpa: Fix an error handling path in eni_vdpa_probe()
Diffstat (limited to 'drivers')
-rw-r--r--drivers/nvdimm/virtio_pmem.c36
-rw-r--r--drivers/scsi/virtio_scsi.c78
-rw-r--r--drivers/vdpa/alibaba/eni_vdpa.c6
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h10
-rw-r--r--drivers/vdpa/mlx5/core/mr.c69
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c209
-rw-r--r--drivers/vdpa/vdpa.c4
-rw-r--r--drivers/vhost/vdpa.c26
-rw-r--r--drivers/virtio/virtio_balloon.c57
-rw-r--r--drivers/virtio/virtio_pci_common.c34
10 files changed, 465 insertions, 64 deletions
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index a92eb172f0e7..4ceced5cefcf 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -29,12 +29,27 @@ static int init_vq(struct virtio_pmem *vpmem)
return 0;
};
+static int virtio_pmem_validate(struct virtio_device *vdev)
+{
+ struct virtio_shm_region shm_reg;
+
+ if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION) &&
+ !virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID)
+ ) {
+ dev_notice(&vdev->dev, "failed to get shared memory region %d\n",
+ VIRTIO_PMEM_SHMEM_REGION_ID);
+ __virtio_clear_bit(vdev, VIRTIO_PMEM_F_SHMEM_REGION);
+ }
+ return 0;
+}
+
static int virtio_pmem_probe(struct virtio_device *vdev)
{
struct nd_region_desc ndr_desc = {};
struct nd_region *nd_region;
struct virtio_pmem *vpmem;
struct resource res;
+ struct virtio_shm_region shm_reg;
int err = 0;
if (!vdev->config->get) {
@@ -57,10 +72,16 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
goto out_err;
}
- virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
- start, &vpmem->start);
- virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
- size, &vpmem->size);
+ if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) {
+ virtio_get_shm_region(vdev, &shm_reg, (u8)VIRTIO_PMEM_SHMEM_REGION_ID);
+ vpmem->start = shm_reg.addr;
+ vpmem->size = shm_reg.len;
+ } else {
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
+ start, &vpmem->start);
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
+ size, &vpmem->size);
+ }
res.start = vpmem->start;
res.end = vpmem->start + vpmem->size - 1;
@@ -122,10 +143,17 @@ static void virtio_pmem_remove(struct virtio_device *vdev)
virtio_reset_device(vdev);
}
+static unsigned int features[] = {
+ VIRTIO_PMEM_F_SHMEM_REGION,
+};
+
static struct virtio_driver virtio_pmem_driver = {
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
+ .validate = virtio_pmem_validate,
.probe = virtio_pmem_probe,
.remove = virtio_pmem_remove,
};
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 9d1bdcdc1331..4cf20be668a6 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -37,6 +37,11 @@
#define VIRTIO_SCSI_EVENT_LEN 8
#define VIRTIO_SCSI_VQ_BASE 2
+static unsigned int virtscsi_poll_queues;
+module_param(virtscsi_poll_queues, uint, 0644);
+MODULE_PARM_DESC(virtscsi_poll_queues,
+ "The number of dedicated virtqueues for polling I/O");
+
/* Command queue element */
struct virtio_scsi_cmd {
struct scsi_cmnd *sc;
@@ -76,6 +81,7 @@ struct virtio_scsi {
struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
u32 num_queues;
+ int io_queues[HCTX_MAX_TYPES];
struct hlist_node node;
@@ -722,9 +728,49 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
static void virtscsi_map_queues(struct Scsi_Host *shost)
{
struct virtio_scsi *vscsi = shost_priv(shost);
- struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
+ int i, qoff;
+
+ for (i = 0, qoff = 0; i < shost->nr_maps; i++) {
+ struct blk_mq_queue_map *map = &shost->tag_set.map[i];
+
+ map->nr_queues = vscsi->io_queues[i];
+ map->queue_offset = qoff;
+ qoff += map->nr_queues;
+
+ if (map->nr_queues == 0)
+ continue;
+
+ /*
+ * Regular queues have interrupts and hence CPU affinity is
+ * defined by the core virtio code, but polling queues have
+ * no interrupts so we let the block layer assign CPU affinity.
+ */
+ if (i == HCTX_TYPE_POLL)
+ blk_mq_map_queues(map);
+ else
+ blk_mq_virtio_map_queues(map, vscsi->vdev, 2);
+ }
+}
+
+static int virtscsi_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
+{
+ struct virtio_scsi *vscsi = shost_priv(shost);
+ struct virtio_scsi_vq *virtscsi_vq = &vscsi->req_vqs[queue_num];
+ unsigned long flags;
+ unsigned int len;
+ int found = 0;
+ void *buf;
+
+ spin_lock_irqsave(&virtscsi_vq->vq_lock, flags);
+
+ while ((buf = virtqueue_get_buf(virtscsi_vq->vq, &len)) != NULL) {
+ virtscsi_complete_cmd(vscsi, buf);
+ found++;
+ }
+
+ spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags);
- blk_mq_virtio_map_queues(qmap, vscsi->vdev, 2);
+ return found;
}
static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq)
@@ -751,6 +797,7 @@ static const struct scsi_host_template virtscsi_host_template = {
.this_id = -1,
.cmd_size = sizeof(struct virtio_scsi_cmd),
.queuecommand = virtscsi_queuecommand,
+ .mq_poll = virtscsi_mq_poll,
.commit_rqs = virtscsi_commit_rqs,
.change_queue_depth = virtscsi_change_queue_depth,
.eh_abort_handler = virtscsi_abort,
@@ -795,13 +842,14 @@ static int virtscsi_init(struct virtio_device *vdev,
{
int err;
u32 i;
- u32 num_vqs;
+ u32 num_vqs, num_poll_vqs, num_req_vqs;
vq_callback_t **callbacks;
const char **names;
struct virtqueue **vqs;
struct irq_affinity desc = { .pre_vectors = 2 };
- num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
+ num_req_vqs = vscsi->num_queues;
+ num_vqs = num_req_vqs + VIRTIO_SCSI_VQ_BASE;
vqs = kmalloc_array(num_vqs, sizeof(struct virtqueue *), GFP_KERNEL);
callbacks = kmalloc_array(num_vqs, sizeof(vq_callback_t *),
GFP_KERNEL);
@@ -812,15 +860,31 @@ static int virtscsi_init(struct virtio_device *vdev,
goto out;
}
+ num_poll_vqs = min_t(unsigned int, virtscsi_poll_queues,
+ num_req_vqs - 1);
+ vscsi->io_queues[HCTX_TYPE_DEFAULT] = num_req_vqs - num_poll_vqs;
+ vscsi->io_queues[HCTX_TYPE_READ] = 0;
+ vscsi->io_queues[HCTX_TYPE_POLL] = num_poll_vqs;
+
+ dev_info(&vdev->dev, "%d/%d/%d default/read/poll queues\n",
+ vscsi->io_queues[HCTX_TYPE_DEFAULT],
+ vscsi->io_queues[HCTX_TYPE_READ],
+ vscsi->io_queues[HCTX_TYPE_POLL]);
+
callbacks[0] = virtscsi_ctrl_done;
callbacks[1] = virtscsi_event_done;
names[0] = "control";
names[1] = "event";
- for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) {
+ for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs - num_poll_vqs; i++) {
callbacks[i] = virtscsi_req_done;
names[i] = "request";
}
+ for (; i < num_vqs; i++) {
+ callbacks[i] = NULL;
+ names[i] = "request_poll";
+ }
+
/* Discover virtqueues and write information to configuration. */
err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
if (err)
@@ -874,6 +938,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
shost->sg_tablesize = sg_elems;
+ shost->nr_maps = 1;
vscsi = shost_priv(shost);
vscsi->vdev = vdev;
vscsi->num_queues = num_queues;
@@ -883,6 +948,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto virtscsi_init_failed;
+ if (vscsi->io_queues[HCTX_TYPE_POLL])
+ shost->nr_maps = HCTX_TYPE_POLL + 1;
+
shost->can_queue = virtqueue_get_vring_size(vscsi->req_vqs[0].vq);
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
diff --git a/drivers/vdpa/alibaba/eni_vdpa.c b/drivers/vdpa/alibaba/eni_vdpa.c
index 5a09a09cca70..cce3d1837104 100644
--- a/drivers/vdpa/alibaba/eni_vdpa.c
+++ b/drivers/vdpa/alibaba/eni_vdpa.c
@@ -497,7 +497,7 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!eni_vdpa->vring) {
ret = -ENOMEM;
ENI_ERR(pdev, "failed to allocate virtqueues\n");
- goto err;
+ goto err_remove_vp_legacy;
}
for (i = 0; i < eni_vdpa->queues; i++) {
@@ -509,11 +509,13 @@ static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues);
if (ret) {
ENI_ERR(pdev, "failed to register to vdpa bus\n");
- goto err;
+ goto err_remove_vp_legacy;
}
return 0;
+err_remove_vp_legacy:
+ vp_legacy_remove(&eni_vdpa->ldev);
err:
put_device(&eni_vdpa->vdpa.dev);
return ret;
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 84547d998bcf..50aac8fe57ef 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -35,6 +35,9 @@ struct mlx5_vdpa_mr {
struct vhost_iotlb *iotlb;
bool user_mr;
+
+ refcount_t refcount;
+ struct list_head mr_list;
};
struct mlx5_vdpa_resources {
@@ -93,6 +96,7 @@ struct mlx5_vdpa_dev {
u32 generation;
struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
+ struct list_head mr_list_head;
/* serialize mr access */
struct mutex mr_mtx;
struct mlx5_control_vq cvq;
@@ -118,8 +122,10 @@ int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb);
void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev,
- struct mlx5_vdpa_mr *mr);
+void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr);
+void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr);
void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr,
unsigned int asid);
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 2197c46e563a..4758914ccf86 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -498,32 +498,54 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{
+ if (WARN_ON(!mr))
+ return;
+
if (mr->user_mr)
destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
vhost_iotlb_free(mr->iotlb);
+
+ list_del(&mr->mr_list);
+
+ kfree(mr);
}
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev,
- struct mlx5_vdpa_mr *mr)
+static void _mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
{
if (!mr)
return;
+ if (refcount_dec_and_test(&mr->refcount))
+ _mlx5_vdpa_destroy_mr(mvdev, mr);
+}
+
+void mlx5_vdpa_put_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
mutex_lock(&mvdev->mr_mtx);
+ _mlx5_vdpa_put_mr(mvdev, mr);
+ mutex_unlock(&mvdev->mr_mtx);
+}
- _mlx5_vdpa_destroy_mr(mvdev, mr);
+static void _mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ if (!mr)
+ return;
- for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) {
- if (mvdev->mr[i] == mr)
- mvdev->mr[i] = NULL;
- }
+ refcount_inc(&mr->refcount);
+}
+void mlx5_vdpa_get_mr(struct mlx5_vdpa_dev *mvdev,
+ struct mlx5_vdpa_mr *mr)
+{
+ mutex_lock(&mvdev->mr_mtx);
+ _mlx5_vdpa_get_mr(mvdev, mr);
mutex_unlock(&mvdev->mr_mtx);
-
- kfree(mr);
}
void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
@@ -534,10 +556,23 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
mutex_lock(&mvdev->mr_mtx);
+ _mlx5_vdpa_put_mr(mvdev, old_mr);
mvdev->mr[asid] = new_mr;
- if (old_mr) {
- _mlx5_vdpa_destroy_mr(mvdev, old_mr);
- kfree(old_mr);
+
+ mutex_unlock(&mvdev->mr_mtx);
+}
+
+static void mlx5_vdpa_show_mr_leaks(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_vdpa_mr *mr;
+
+ mutex_lock(&mvdev->mr_mtx);
+
+ list_for_each_entry(mr, &mvdev->mr_list_head, mr_list) {
+
+ mlx5_vdpa_warn(mvdev, "mkey still alive after resource delete: "
+ "mr: %p, mkey: 0x%x, refcount: %u\n",
+ mr, mr->mkey, refcount_read(&mr->refcount));
}
mutex_unlock(&mvdev->mr_mtx);
@@ -547,9 +582,11 @@ void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
{
for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
- mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]);
+ mlx5_vdpa_update_mr(mvdev, NULL, i);
prune_iotlb(mvdev->cvq.iotlb);
+
+ mlx5_vdpa_show_mr_leaks(mvdev);
}
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
@@ -576,6 +613,8 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
if (err)
goto err_iotlb;
+ list_add_tail(&mr->mr_list, &mvdev->mr_list_head);
+
return 0;
err_iotlb:
@@ -607,6 +646,8 @@ struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
if (err)
goto out_err;
+ refcount_set(&mr->refcount, 1);
+
return mr;
out_err:
@@ -651,7 +692,7 @@ int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
if (asid >= MLX5_VDPA_NUM_AS)
return -EINVAL;
- mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]);
+ mlx5_vdpa_update_mr(mvdev, NULL, asid);
if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_dma_mr(mvdev))
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 26ba7da6b410..778821bab7d9 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -120,6 +120,12 @@ struct mlx5_vdpa_virtqueue {
u16 avail_idx;
u16 used_idx;
int fw_state;
+
+ u64 modified_fields;
+
+ struct mlx5_vdpa_mr *vq_mr;
+ struct mlx5_vdpa_mr *desc_mr;
+
struct msi_map map;
/* keep last in the struct */
@@ -943,6 +949,14 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ mlx5_vdpa_get_mr(mvdev, vq_mr);
+ mvq->vq_mr = vq_mr;
+
+ if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
+ mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
+ mvq->desc_mr = vq_desc_mr;
+ }
+
return 0;
err_cmd:
@@ -969,6 +983,12 @@ static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtq
}
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq);
+
+ mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
+ mvq->vq_mr = NULL;
+
+ mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
+ mvq->desc_mr = NULL;
}
static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
@@ -1167,7 +1187,12 @@ err_cmd:
return err;
}
-static bool is_valid_state_change(int oldstate, int newstate)
+static bool is_resumable(struct mlx5_vdpa_net *ndev)
+{
+ return ndev->mvdev.vdev.config->resume;
+}
+
+static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
@@ -1175,25 +1200,43 @@ static bool is_valid_state_change(int oldstate, int newstate)
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
+ return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default:
return false;
}
}
-static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
+static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
+{
+ /* Only state is always modifiable */
+ if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
+ return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
+ mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
+
+ return true;
+}
+
+static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ int state)
{
int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
+ struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
+ struct mlx5_vdpa_mr *desc_mr = NULL;
+ struct mlx5_vdpa_mr *vq_mr = NULL;
+ bool state_change = false;
void *obj_context;
void *cmd_hdr;
+ void *vq_ctx;
void *in;
int err;
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
return 0;
- if (!is_valid_state_change(mvq->fw_state, state))
+ if (!modifiable_virtqueue_fields(mvq))
return -EINVAL;
in = kzalloc(inlen, GFP_KERNEL);
@@ -1208,17 +1251,83 @@ static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
- MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
- MLX5_VIRTQ_MODIFY_MASK_STATE);
- MLX5_SET(virtio_net_q_object, obj_context, state, state);
+ vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
+ if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ MLX5_SET(virtio_net_q_object, obj_context, state, state);
+ state_change = true;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
+ MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
+ MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
+ MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
+ MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
+ MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
+ vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
+
+ if (vq_mr)
+ MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
+ else
+ mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
+ desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
+
+ if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
+ MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
+ else
+ mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
+ }
+
+ MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
- kfree(in);
- if (!err)
+ if (err)
+ goto done;
+
+ if (state_change)
mvq->fw_state = state;
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
+ mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
+ mlx5_vdpa_get_mr(mvdev, vq_mr);
+ mvq->vq_mr = vq_mr;
+ }
+
+ if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
+ mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
+ mlx5_vdpa_get_mr(mvdev, desc_mr);
+ mvq->desc_mr = desc_mr;
+ }
+
+ mvq->modified_fields = 0;
+
+done:
+ kfree(in);
return err;
}
+static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
+ struct mlx5_vdpa_virtqueue *mvq,
+ unsigned int state)
+{
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
+ return modify_virtqueue(ndev, mvq, state);
+}
+
static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
@@ -1347,7 +1456,7 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
goto err_vq;
if (mvq->ready) {
- err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+ err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
if (err) {
mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
idx, err);
@@ -1382,7 +1491,7 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
return;
- if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
+ if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
if (query_virtqueue(ndev, mvq, &attr)) {
@@ -1401,12 +1510,31 @@ static void suspend_vqs(struct mlx5_vdpa_net *ndev)
suspend_vq(ndev, &ndev->vqs[i]);
}
+static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
+{
+ if (!mvq->initialized || !is_resumable(ndev))
+ return;
+
+ if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)
+ return;
+
+ if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY))
+ mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index);
+}
+
+static void resume_vqs(struct mlx5_vdpa_net *ndev)
+{
+ for (int i = 0; i < ndev->mvdev.max_vqs; i++)
+ resume_vq(ndev, &ndev->vqs[i]);
+}
+
static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
if (!mvq->initialized)
return;
suspend_vq(ndev, mvq);
+ mvq->modified_fields = 0;
destroy_virtqueue(ndev, mvq);
dealloc_vector(ndev, mvq);
counter_set_dealloc(ndev, mvq);
@@ -2138,6 +2266,7 @@ static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_
mvq->desc_addr = desc_area;
mvq->device_addr = device_area;
mvq->driver_addr = driver_area;
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
return 0;
}
@@ -2207,7 +2336,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
if (!ready) {
suspend_vq(ndev, mvq);
} else {
- err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
+ err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
if (err) {
mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
ready = false;
@@ -2255,6 +2384,8 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
mvq->used_idx = state->split.avail_index;
mvq->avail_idx = state->split.avail_index;
+ mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
+ MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
return 0;
}
@@ -2703,24 +2834,35 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
unsigned int asid)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ bool teardown = !is_resumable(ndev);
int err;
suspend_vqs(ndev);
- err = save_channels_info(ndev);
- if (err)
- return err;
+ if (teardown) {
+ err = save_channels_info(ndev);
+ if (err)
+ return err;
- teardown_driver(ndev);
+ teardown_driver(ndev);
+ }
mlx5_vdpa_update_mr(mvdev, new_mr, asid);
+ for (int i = 0; i < ndev->cur_num_vqs; i++)
+ ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
+ MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
+
if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
return 0;
- restore_channels_info(ndev);
- err = setup_driver(mvdev);
- if (err)
- return err;
+ if (teardown) {
+ restore_channels_info(ndev);
+ err = setup_driver(mvdev);
+ if (err)
+ return err;
+ }
+
+ resume_vqs(ndev);
return 0;
}
@@ -2804,8 +2946,10 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
{
int i;
- for (i = 0; i < ndev->mvdev.max_vqs; i++)
+ for (i = 0; i < ndev->mvdev.max_vqs; i++) {
ndev->vqs[i].ready = false;
+ ndev->vqs[i].modified_fields = 0;
+ }
ndev->mvdev.cvq.ready = false;
}
@@ -2982,7 +3126,7 @@ static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
out_err:
- mlx5_vdpa_destroy_mr(mvdev, new_mr);
+ mlx5_vdpa_put_mr(mvdev, new_mr);
return err;
}
@@ -3229,6 +3373,23 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
return 0;
}
+static int mlx5_vdpa_resume(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev;
+
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+
+ mlx5_vdpa_info(mvdev, "resuming device\n");
+
+ down_write(&ndev->reslock);
+ mvdev->suspended = false;
+ resume_vqs(ndev);
+ register_link_notifier(ndev);
+ up_write(&ndev->reslock);
+ return 0;
+}
+
static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
unsigned int asid)
{
@@ -3285,6 +3446,7 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
.get_vq_dma_dev = mlx5_get_vq_dma_dev,
.free = mlx5_vdpa_free,
.suspend = mlx5_vdpa_suspend,
+ .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
};
static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
@@ -3560,6 +3722,8 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
if (err)
goto err_mpfs;
+ INIT_LIST_HEAD(&mvdev->mr_list_head);
+
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
err = mlx5_vdpa_create_dma_mr(mvdev);
if (err)
@@ -3656,6 +3820,9 @@ static int mlx5v_probe(struct auxiliary_device *adev,
if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
mgtdev->vdpa_ops.get_vq_desc_group = NULL;
+ if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
+ mgtdev->vdpa_ops.resume = NULL;
+
err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
if (err)
goto reg_err;
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index a7612e0783b3..d0695680b282 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -131,7 +131,7 @@ static void vdpa_release_dev(struct device *d)
if (ops->free)
ops->free(vdev);
- ida_simple_remove(&vdpa_index_ida, vdev->index);
+ ida_free(&vdpa_index_ida, vdev->index);
kfree(vdev->driver_override);
kfree(vdev);
}
@@ -205,7 +205,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
return vdev;
err_name:
- ida_simple_remove(&vdpa_index_ida, vdev->index);
+ ida_free(&vdpa_index_ida, vdev->index);
err_ida:
kfree(vdev);
err:
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 173beda74b38..bc4a51e4638b 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -59,6 +59,7 @@ struct vhost_vdpa {
int in_batch;
struct vdpa_iova_range range;
u32 batch_asid;
+ bool suspended;
};
static DEFINE_IDA(vhost_vdpa_ida);
@@ -232,6 +233,8 @@ static int _compat_vdpa_reset(struct vhost_vdpa *v)
struct vdpa_device *vdpa = v->vdpa;
u32 flags = 0;
+ v->suspended = false;
+
if (v->vdev.vqs) {
flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
VHOST_BACKEND_F_IOTLB_PERSIST) ?
@@ -590,11 +593,16 @@ static long vhost_vdpa_suspend(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ int ret;
if (!ops->suspend)
return -EOPNOTSUPP;
- return ops->suspend(vdpa);
+ ret = ops->suspend(vdpa);
+ if (!ret)
+ v->suspended = true;
+
+ return ret;
}
/* After a successful return of this ioctl the device resumes processing
@@ -605,11 +613,16 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
+ int ret;
if (!ops->resume)
return -EOPNOTSUPP;
- return ops->resume(vdpa);
+ ret = ops->resume(vdpa);
+ if (!ret)
+ v->suspended = false;
+
+ return ret;
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
@@ -690,6 +703,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
switch (cmd) {
case VHOST_SET_VRING_ADDR:
+ if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
+ return -EINVAL;
+
if (ops->set_vq_address(vdpa, idx,
(u64)(uintptr_t)vq->desc,
(u64)(uintptr_t)vq->avail,
@@ -698,6 +714,9 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break;
case VHOST_SET_VRING_BASE:
+ if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_DRIVER_OK) && !v->suspended)
+ return -EINVAL;
+
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
@@ -968,7 +987,8 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
r = ops->set_map(vdpa, asid, iotlb);
} else {
r = iommu_map(v->domain, iova, pa, size,
- perm_to_iommu_flags(perm), GFP_KERNEL);
+ perm_to_iommu_flags(perm),
+ GFP_KERNEL_ACCOUNT);
}
if (r) {
vhost_iotlb_del_range(iotlb, iova, iova + size - 1);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 59cdc0292dce..1f5b3dd31fcf 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -119,6 +119,11 @@ struct virtio_balloon {
/* Free page reporting device */
struct virtqueue *reporting_vq;
struct page_reporting_dev_info pr_dev_info;
+
+ /* State for keeping the wakeup_source active while adjusting the balloon */
+ spinlock_t adjustment_lock;
+ bool adjustment_signal_pending;
+ bool adjustment_in_progress;
};
static const struct virtio_device_id id_table[] = {
@@ -437,6 +442,31 @@ static void virtio_balloon_queue_free_page_work(struct virtio_balloon *vb)
queue_work(vb->balloon_wq, &vb->report_free_page_work);
}
+static void start_update_balloon_size(struct virtio_balloon *vb)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vb->adjustment_lock, flags);
+ vb->adjustment_signal_pending = true;
+ if (!vb->adjustment_in_progress) {
+ vb->adjustment_in_progress = true;
+ pm_stay_awake(vb->vdev->dev.parent);
+ }
+ spin_unlock_irqrestore(&vb->adjustment_lock, flags);
+
+ queue_work(system_freezable_wq, &vb->update_balloon_size_work);
+}
+
+static void end_update_balloon_size(struct virtio_balloon *vb)
+{
+ spin_lock_irq(&vb->adjustment_lock);
+ if (!vb->adjustment_signal_pending && vb->adjustment_in_progress) {
+ vb->adjustment_in_progress = false;
+ pm_relax(vb->vdev->dev.parent);
+ }
+ spin_unlock_irq(&vb->adjustment_lock);
+}
+
static void virtballoon_changed(struct virtio_device *vdev)
{
struct virtio_balloon *vb = vdev->priv;
@@ -444,8 +474,7 @@ static void virtballoon_changed(struct virtio_device *vdev)
spin_lock_irqsave(&vb->stop_update_lock, flags);
if (!vb->stop_update) {
- queue_work(system_freezable_wq,
- &vb->update_balloon_size_work);
+ start_update_balloon_size(vb);
virtio_balloon_queue_free_page_work(vb);
}
spin_unlock_irqrestore(&vb->stop_update_lock, flags);
@@ -476,19 +505,25 @@ static void update_balloon_size_func(struct work_struct *work)
vb = container_of(work, struct virtio_balloon,
update_balloon_size_work);
- diff = towards_target(vb);
- if (!diff)
- return;
+ spin_lock_irq(&vb->adjustment_lock);
+ vb->adjustment_signal_pending = false;
+ spin_unlock_irq(&vb->adjustment_lock);
- if (diff > 0)
- diff -= fill_balloon(vb, diff);
- else
- diff += leak_balloon(vb, -diff);
- update_balloon_size(vb);
+ diff = towards_target(vb);
+
+ if (diff) {
+ if (diff > 0)
+ diff -= fill_balloon(vb, diff);
+ else
+ diff += leak_balloon(vb, -diff);
+ update_balloon_size(vb);
+ }
if (diff)
queue_work(system_freezable_wq, work);
+ else
+ end_update_balloon_size(vb);
}
static int init_vqs(struct virtio_balloon *vb)
@@ -992,6 +1027,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
goto out_unregister_oom;
}
+ spin_lock_init(&vb->adjustment_lock);
+
virtio_device_ready(vdev);
if (towards_target(vb))
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 9f93330fc2cc..b655fccaf773 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -495,8 +495,40 @@ static int virtio_pci_restore(struct device *dev)
return virtio_device_restore(&vp_dev->vdev);
}
+static bool vp_supports_pm_no_reset(struct device *dev)
+{
+ struct pci_dev *pci_dev = to_pci_dev(dev);
+ u16 pmcsr;
+
+ if (!pci_dev->pm_cap)
+ return false;
+
+ pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+ if (PCI_POSSIBLE_ERROR(pmcsr)) {
+ dev_err(dev, "Unable to query pmcsr");
+ return false;
+ }
+
+ return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET;
+}
+
+static int virtio_pci_suspend(struct device *dev)
+{
+ return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev);
+}
+
+static int virtio_pci_resume(struct device *dev)
+{
+ return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev);
+}
+
static const struct dev_pm_ops virtio_pci_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
+ .suspend = virtio_pci_suspend,
+ .resume = virtio_pci_resume,
+ .freeze = virtio_pci_freeze,
+ .thaw = virtio_pci_restore,
+ .poweroff = virtio_pci_freeze,
+ .restore = virtio_pci_restore,
};
#endif