summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:38:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-03 15:38:26 -0700
commita8d70602b186f3c347e62c59a418be802b71886d (patch)
tree48bf9b05703ff824a4dddfaaa773687c9fe6fd05 /include
parente8069f5a8e3bdb5fdeeff895780529388592ee7a (diff)
parent9e396a2f434f829fb3b98a24bb8db5429320589d (diff)
downloadlwn-a8d70602b186f3c347e62c59a418be802b71886d.tar.gz
lwn-a8d70602b186f3c347e62c59a418be802b71886d.zip
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: - resume support in vdpa/solidrun - structure size optimizations in virtio_pci - new pds_vdpa driver - immediate initialization mechanism for vdpa/ifcvf - interrupt bypass for vdpa/mlx5 - multiple worker support for vhost - viirtio net in Intel F2000X-PL support for vdpa/ifcvf - fixes, cleanups all over the place * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits) vhost: Make parameter name match of vhost_get_vq_desc() vduse: fix NULL pointer dereference vhost: Allow worker switching while work is queueing vhost_scsi: add support for worker ioctls vhost: allow userspace to create workers vhost: replace single worker pointer with xarray vhost: add helper to parse userspace vring state/file vhost: remove vhost_work_queue vhost_scsi: flush IO vqs then send TMF rsp vhost_scsi: convert to vhost_vq_work_queue vhost_scsi: make SCSI cmd completion per vq vhost_sock: convert to vhost_vq_work_queue vhost: convert poll work to be vq based vhost: take worker or vq for flushing vhost: take worker or vq instead of dev for queueing vhost, vhost_net: add helper to check if vq has work vhost: add vhost_worker pointer to vhost_virtqueue vhost: dynamically allocate vhost_worker vhost: create worker at end of vhost_dev_set_owner virtio_bt: call scheduler when we free unused buffs ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/pds/pds_adminq.h247
-rw-r--r--include/linux/pds/pds_common.h21
-rw-r--r--include/linux/virtio.h5
-rw-r--r--include/linux/virtio_pci_modern.h6
-rw-r--r--include/uapi/linux/vhost.h31
-rw-r--r--include/uapi/linux/vhost_types.h16
6 files changed, 305 insertions, 21 deletions
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
index 98a60ce87b92..bcba7fda3cc9 100644
--- a/include/linux/pds/pds_adminq.h
+++ b/include/linux/pds/pds_adminq.h
@@ -222,6 +222,27 @@ enum pds_core_lif_type {
PDS_CORE_LIF_TYPE_DEFAULT = 0,
};
+#define PDS_CORE_IFNAMSIZ 16
+
+/**
+ * enum pds_core_logical_qtype - Logical Queue Types
+ * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue
+ * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue
+ * @PDS_CORE_QTYPE_RXQ: Receive Queue
+ * @PDS_CORE_QTYPE_TXQ: Transmit Queue
+ * @PDS_CORE_QTYPE_EQ: Event Queue
+ * @PDS_CORE_QTYPE_MAX: Max queue type supported
+ */
+enum pds_core_logical_qtype {
+ PDS_CORE_QTYPE_ADMINQ = 0,
+ PDS_CORE_QTYPE_NOTIFYQ = 1,
+ PDS_CORE_QTYPE_RXQ = 2,
+ PDS_CORE_QTYPE_TXQ = 3,
+ PDS_CORE_QTYPE_EQ = 4,
+
+ PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */
+};
+
/**
* union pds_core_lif_config - LIF configuration
* @state: LIF state (enum pds_core_lif_state)
@@ -584,6 +605,219 @@ struct pds_core_q_init_comp {
u8 color;
};
+/*
+ * enum pds_vdpa_cmd_opcode - vDPA Device commands
+ */
+enum pds_vdpa_cmd_opcode {
+ PDS_VDPA_CMD_INIT = 48,
+ PDS_VDPA_CMD_IDENT = 49,
+ PDS_VDPA_CMD_RESET = 51,
+ PDS_VDPA_CMD_VQ_RESET = 52,
+ PDS_VDPA_CMD_VQ_INIT = 53,
+ PDS_VDPA_CMD_STATUS_UPDATE = 54,
+ PDS_VDPA_CMD_SET_FEATURES = 55,
+ PDS_VDPA_CMD_SET_ATTR = 56,
+};
+
+/**
+ * struct pds_vdpa_cmd - generic command
+ * @opcode: Opcode
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ */
+struct pds_vdpa_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_init_cmd - INIT command
+ * @opcode: Opcode PDS_VDPA_CMD_INIT
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ */
+struct pds_vdpa_init_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+};
+
+/**
+ * struct pds_vdpa_ident - vDPA identification data
+ * @hw_features: vDPA features supported by device
+ * @max_vqs: max queues available (2 queues for a single queuepair)
+ * @max_qlen: log(2) of maximum number of descriptors
+ * @min_qlen: log(2) of minimum number of descriptors
+ *
+ * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT
+ * transaction. Set up the DMA block and send the address in the IDENT cmd
+ * data, the DSC will write the ident information, then we can remove the DMA
+ * block after reading the answer. If the completion status is 0, then there
+ * is valid information, else there was an error and the data should be invalid.
+ */
+struct pds_vdpa_ident {
+ __le64 hw_features;
+ __le16 max_vqs;
+ __le16 max_qlen;
+ __le16 min_qlen;
+};
+
+/**
+ * struct pds_vdpa_ident_cmd - IDENT command
+ * @opcode: Opcode PDS_VDPA_CMD_IDENT
+ * @rsvd: Word boundary padding
+ * @vf_id: VF id
+ * @len: length of ident info DMA space
+ * @ident_pa: address for DMA of ident info (struct pds_vdpa_ident)
+ * only used for this transaction, then forgotten by DSC
+ */
+struct pds_vdpa_ident_cmd {
+ u8 opcode;
+ u8 rsvd;
+ __le16 vf_id;
+ __le32 len;
+ __le64 ident_pa;
+};
+
+/**
+ * struct pds_vdpa_status_cmd - STATUS_UPDATE command
+ * @opcode: Opcode PDS_VDPA_CMD_STATUS_UPDATE
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ * @status: new status bits
+ */
+struct pds_vdpa_status_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+ u8 status;
+};
+
+/**
+ * enum pds_vdpa_attr - List of VDPA device attributes
+ * @PDS_VDPA_ATTR_MAC: MAC address
+ * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs
+ */
+enum pds_vdpa_attr {
+ PDS_VDPA_ATTR_MAC = 1,
+ PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2,
+};
+
+/**
+ * struct pds_vdpa_setattr_cmd - SET_ATTR command
+ * @opcode: Opcode PDS_VDPA_CMD_SET_ATTR
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ * @attr: attribute to be changed (enum pds_vdpa_attr)
+ * @pad: Word boundary padding
+ * @mac: new mac address to be assigned as vdpa device address
+ * @max_vq_pairs: new limit of virtqueue pairs
+ */
+struct pds_vdpa_setattr_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+ u8 attr;
+ u8 pad[3];
+ union {
+ u8 mac[6];
+ __le16 max_vq_pairs;
+ } __packed;
+};
+
+/**
+ * struct pds_vdpa_vq_init_cmd - queue init command
+ * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ * @qid: Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq)
+ * @len: log(2) of max descriptor count
+ * @desc_addr: DMA address of descriptor area
+ * @avail_addr: DMA address of available descriptors (aka driver area)
+ * @used_addr: DMA address of used descriptors (aka device area)
+ * @intr_index: interrupt index
+ * @avail_index: initial device position in available ring
+ * @used_index: initial device position in used ring
+ */
+struct pds_vdpa_vq_init_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+ __le16 qid;
+ __le16 len;
+ __le64 desc_addr;
+ __le64 avail_addr;
+ __le64 used_addr;
+ __le16 intr_index;
+ __le16 avail_index;
+ __le16 used_index;
+};
+
+/**
+ * struct pds_vdpa_vq_init_comp - queue init completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @hw_qtype: HW queue type, used in doorbell selection
+ * @hw_qindex: HW queue index, used in doorbell selection
+ * @rsvd: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_vdpa_vq_init_comp {
+ u8 status;
+ u8 hw_qtype;
+ __le16 hw_qindex;
+ u8 rsvd[11];
+ u8 color;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_cmd - queue reset command
+ * @opcode: Opcode PDS_VDPA_CMD_VQ_RESET
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ * @qid: Queue id
+ */
+struct pds_vdpa_vq_reset_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+ __le16 qid;
+};
+
+/**
+ * struct pds_vdpa_vq_reset_comp - queue reset completion
+ * @status: Status of the command (enum pds_core_status_code)
+ * @rsvd0: Word boundary padding
+ * @avail_index: current device position in available ring
+ * @used_index: current device position in used ring
+ * @rsvd: Word boundary padding
+ * @color: Color bit
+ */
+struct pds_vdpa_vq_reset_comp {
+ u8 status;
+ u8 rsvd0;
+ __le16 avail_index;
+ __le16 used_index;
+ u8 rsvd[9];
+ u8 color;
+};
+
+/**
+ * struct pds_vdpa_set_features_cmd - set hw features
+ * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES
+ * @vdpa_index: Index for vdpa subdevice
+ * @vf_id: VF id
+ * @rsvd: Word boundary padding
+ * @features: Feature bit mask
+ */
+struct pds_vdpa_set_features_cmd {
+ u8 opcode;
+ u8 vdpa_index;
+ __le16 vf_id;
+ __le32 rsvd;
+ __le64 features;
+};
+
union pds_core_adminq_cmd {
u8 opcode;
u8 bytes[64];
@@ -600,6 +834,16 @@ union pds_core_adminq_cmd {
struct pds_core_q_identify_cmd q_ident;
struct pds_core_q_init_cmd q_init;
+
+ struct pds_vdpa_cmd vdpa;
+ struct pds_vdpa_init_cmd vdpa_init;
+ struct pds_vdpa_ident_cmd vdpa_ident;
+ struct pds_vdpa_status_cmd vdpa_status;
+ struct pds_vdpa_setattr_cmd vdpa_setattr;
+ struct pds_vdpa_set_features_cmd vdpa_set_features;
+ struct pds_vdpa_vq_init_cmd vdpa_vq_init;
+ struct pds_vdpa_vq_reset_cmd vdpa_vq_reset;
+
};
union pds_core_adminq_comp {
@@ -621,6 +865,9 @@ union pds_core_adminq_comp {
struct pds_core_q_identify_comp q_ident;
struct pds_core_q_init_comp q_init;
+
+ struct pds_vdpa_vq_init_comp vdpa_vq_init;
+ struct pds_vdpa_vq_reset_comp vdpa_vq_reset;
};
#ifndef __CHECKER__
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
index 060331486d50..435c8e8161c2 100644
--- a/include/linux/pds/pds_common.h
+++ b/include/linux/pds/pds_common.h
@@ -39,26 +39,7 @@ enum pds_core_vif_types {
#define PDS_DEV_TYPE_RDMA_STR "RDMA"
#define PDS_DEV_TYPE_LM_STR "LM"
-#define PDS_CORE_IFNAMSIZ 16
-
-/**
- * enum pds_core_logical_qtype - Logical Queue Types
- * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue
- * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue
- * @PDS_CORE_QTYPE_RXQ: Receive Queue
- * @PDS_CORE_QTYPE_TXQ: Transmit Queue
- * @PDS_CORE_QTYPE_EQ: Event Queue
- * @PDS_CORE_QTYPE_MAX: Max queue type supported
- */
-enum pds_core_logical_qtype {
- PDS_CORE_QTYPE_ADMINQ = 0,
- PDS_CORE_QTYPE_NOTIFYQ = 1,
- PDS_CORE_QTYPE_RXQ = 2,
- PDS_CORE_QTYPE_TXQ = 3,
- PDS_CORE_QTYPE_EQ = 4,
-
- PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */
-};
+#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR
int pdsc_register_notify(struct notifier_block *nb);
void pdsc_unregister_notify(struct notifier_block *nb);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b93238db94e3..de6041deee37 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num,
* @config_enabled: configuration change reporting enabled
* @config_change_pending: configuration change reported while disabled
* @config_lock: protects configuration change reporting
+ * @vqs_list_lock: protects @vqs.
* @dev: underlying device.
* @id: the device type identification (used to match it with a driver).
* @config: the configuration ops for this device.
@@ -117,7 +118,7 @@ struct virtio_device {
bool config_enabled;
bool config_change_pending;
spinlock_t config_lock;
- spinlock_t vqs_list_lock; /* Protects VQs list access */
+ spinlock_t vqs_list_lock;
struct device dev;
struct virtio_device_id id;
const struct virtio_config_ops *config;
@@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
* @feature_table_size: number of entries in the feature table array.
* @feature_table_legacy: same as feature_table but when working in legacy mode.
* @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @validate: the function to call to validate features and config space.
+ * Returns 0 or -errno.
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @scan: optional function to call after successful probe; intended
* for virtio-scsi to invoke a scan.
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
index c4eeb79b0139..067ac1d789bc 100644
--- a/include/linux/virtio_pci_modern.h
+++ b/include/linux/virtio_pci_modern.h
@@ -38,6 +38,12 @@ struct virtio_pci_modern_device {
int modern_bars;
struct virtio_device_id id;
+
+ /* optional check for vendor virtio device, returns dev_id or -ERRNO */
+ int (*device_id_check)(struct pci_dev *pdev);
+
+ /* optional mask for devices with limited DMA space */
+ u64 dma_mask;
};
/*
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index 92e1b700b51c..f5c48b61ab62 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -45,6 +45,25 @@
#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
/* Specify an eventfd file descriptor to signal on log write. */
#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ *
+ * The worker's ID used in other commands will be returned in
+ * vhost_worker_state.
+ */
+#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed.
+ */
+#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
/* Ring setup. */
/* Set number of descriptors in ring. This parameter can not
@@ -70,6 +89,18 @@
#define VHOST_VRING_BIG_ENDIAN 1
#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues.
+ *
+ * This will replace the virtqueue's existing worker. If the replaced worker
+ * is no longer attached to any virtqueues, it can be freed with
+ * VHOST_FREE_WORKER.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \
+ struct vhost_vring_worker)
+/* Return the vring worker's ID */
+#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \
+ struct vhost_vring_worker)
/* The following ioctls use eventfd file descriptors to signal and poll
* for events. */
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index c5690a8992d8..d3aad12ad1fa 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -47,6 +47,22 @@ struct vhost_vring_addr {
__u64 log_guest_addr;
};
+struct vhost_worker_state {
+ /*
+ * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
+ * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
+ * to free.
+ */
+ unsigned int worker_id;
+};
+
+struct vhost_vring_worker {
+ /* vring index */
+ unsigned int index;
+ /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
+ unsigned int worker_id;
+};
+
/* no alignment requirement */
struct vhost_iotlb_msg {
__u64 iova;