diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 15:38:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-03 15:38:26 -0700 |
commit | a8d70602b186f3c347e62c59a418be802b71886d (patch) | |
tree | 48bf9b05703ff824a4dddfaaa773687c9fe6fd05 /include | |
parent | e8069f5a8e3bdb5fdeeff895780529388592ee7a (diff) | |
parent | 9e396a2f434f829fb3b98a24bb8db5429320589d (diff) | |
download | lwn-a8d70602b186f3c347e62c59a418be802b71886d.tar.gz lwn-a8d70602b186f3c347e62c59a418be802b71886d.zip |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- resume support in vdpa/solidrun
- structure size optimizations in virtio_pci
- new pds_vdpa driver
- immediate initialization mechanism for vdpa/ifcvf
- interrupt bypass for vdpa/mlx5
- multiple worker support for vhost
- viirtio net in Intel F2000X-PL support for vdpa/ifcvf
- fixes, cleanups all over the place
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (48 commits)
vhost: Make parameter name match of vhost_get_vq_desc()
vduse: fix NULL pointer dereference
vhost: Allow worker switching while work is queueing
vhost_scsi: add support for worker ioctls
vhost: allow userspace to create workers
vhost: replace single worker pointer with xarray
vhost: add helper to parse userspace vring state/file
vhost: remove vhost_work_queue
vhost_scsi: flush IO vqs then send TMF rsp
vhost_scsi: convert to vhost_vq_work_queue
vhost_scsi: make SCSI cmd completion per vq
vhost_sock: convert to vhost_vq_work_queue
vhost: convert poll work to be vq based
vhost: take worker or vq for flushing
vhost: take worker or vq instead of dev for queueing
vhost, vhost_net: add helper to check if vq has work
vhost: add vhost_worker pointer to vhost_virtqueue
vhost: dynamically allocate vhost_worker
vhost: create worker at end of vhost_dev_set_owner
virtio_bt: call scheduler when we free unused buffs
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/pds/pds_adminq.h | 247 | ||||
-rw-r--r-- | include/linux/pds/pds_common.h | 21 | ||||
-rw-r--r-- | include/linux/virtio.h | 5 | ||||
-rw-r--r-- | include/linux/virtio_pci_modern.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/vhost.h | 31 | ||||
-rw-r--r-- | include/uapi/linux/vhost_types.h | 16 |
6 files changed, 305 insertions, 21 deletions
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h index 98a60ce87b92..bcba7fda3cc9 100644 --- a/include/linux/pds/pds_adminq.h +++ b/include/linux/pds/pds_adminq.h @@ -222,6 +222,27 @@ enum pds_core_lif_type { PDS_CORE_LIF_TYPE_DEFAULT = 0, }; +#define PDS_CORE_IFNAMSIZ 16 + +/** + * enum pds_core_logical_qtype - Logical Queue Types + * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue + * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue + * @PDS_CORE_QTYPE_RXQ: Receive Queue + * @PDS_CORE_QTYPE_TXQ: Transmit Queue + * @PDS_CORE_QTYPE_EQ: Event Queue + * @PDS_CORE_QTYPE_MAX: Max queue type supported + */ +enum pds_core_logical_qtype { + PDS_CORE_QTYPE_ADMINQ = 0, + PDS_CORE_QTYPE_NOTIFYQ = 1, + PDS_CORE_QTYPE_RXQ = 2, + PDS_CORE_QTYPE_TXQ = 3, + PDS_CORE_QTYPE_EQ = 4, + + PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */ +}; + /** * union pds_core_lif_config - LIF configuration * @state: LIF state (enum pds_core_lif_state) @@ -584,6 +605,219 @@ struct pds_core_q_init_comp { u8 color; }; +/* + * enum pds_vdpa_cmd_opcode - vDPA Device commands + */ +enum pds_vdpa_cmd_opcode { + PDS_VDPA_CMD_INIT = 48, + PDS_VDPA_CMD_IDENT = 49, + PDS_VDPA_CMD_RESET = 51, + PDS_VDPA_CMD_VQ_RESET = 52, + PDS_VDPA_CMD_VQ_INIT = 53, + PDS_VDPA_CMD_STATUS_UPDATE = 54, + PDS_VDPA_CMD_SET_FEATURES = 55, + PDS_VDPA_CMD_SET_ATTR = 56, +}; + +/** + * struct pds_vdpa_cmd - generic command + * @opcode: Opcode + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + */ +struct pds_vdpa_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; +}; + +/** + * struct pds_vdpa_init_cmd - INIT command + * @opcode: Opcode PDS_VDPA_CMD_INIT + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + */ +struct pds_vdpa_init_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; +}; + +/** + * struct pds_vdpa_ident - vDPA identification data + * @hw_features: vDPA features supported by device + * @max_vqs: max queues available (2 queues for a single queuepair) + * @max_qlen: log(2) of maximum number of descriptors + * @min_qlen: log(2) of minimum number of descriptors + * + * This struct is used in a DMA block that is set up for the PDS_VDPA_CMD_IDENT + * transaction. Set up the DMA block and send the address in the IDENT cmd + * data, the DSC will write the ident information, then we can remove the DMA + * block after reading the answer. If the completion status is 0, then there + * is valid information, else there was an error and the data should be invalid. + */ +struct pds_vdpa_ident { + __le64 hw_features; + __le16 max_vqs; + __le16 max_qlen; + __le16 min_qlen; +}; + +/** + * struct pds_vdpa_ident_cmd - IDENT command + * @opcode: Opcode PDS_VDPA_CMD_IDENT + * @rsvd: Word boundary padding + * @vf_id: VF id + * @len: length of ident info DMA space + * @ident_pa: address for DMA of ident info (struct pds_vdpa_ident) + * only used for this transaction, then forgotten by DSC + */ +struct pds_vdpa_ident_cmd { + u8 opcode; + u8 rsvd; + __le16 vf_id; + __le32 len; + __le64 ident_pa; +}; + +/** + * struct pds_vdpa_status_cmd - STATUS_UPDATE command + * @opcode: Opcode PDS_VDPA_CMD_STATUS_UPDATE + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @status: new status bits + */ +struct pds_vdpa_status_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + u8 status; +}; + +/** + * enum pds_vdpa_attr - List of VDPA device attributes + * @PDS_VDPA_ATTR_MAC: MAC address + * @PDS_VDPA_ATTR_MAX_VQ_PAIRS: Max virtqueue pairs + */ +enum pds_vdpa_attr { + PDS_VDPA_ATTR_MAC = 1, + PDS_VDPA_ATTR_MAX_VQ_PAIRS = 2, +}; + +/** + * struct pds_vdpa_setattr_cmd - SET_ATTR command + * @opcode: Opcode PDS_VDPA_CMD_SET_ATTR + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @attr: attribute to be changed (enum pds_vdpa_attr) + * @pad: Word boundary padding + * @mac: new mac address to be assigned as vdpa device address + * @max_vq_pairs: new limit of virtqueue pairs + */ +struct pds_vdpa_setattr_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + u8 attr; + u8 pad[3]; + union { + u8 mac[6]; + __le16 max_vq_pairs; + } __packed; +}; + +/** + * struct pds_vdpa_vq_init_cmd - queue init command + * @opcode: Opcode PDS_VDPA_CMD_VQ_INIT + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @qid: Queue id (bit0 clear = rx, bit0 set = tx, qid=N is ctrlq) + * @len: log(2) of max descriptor count + * @desc_addr: DMA address of descriptor area + * @avail_addr: DMA address of available descriptors (aka driver area) + * @used_addr: DMA address of used descriptors (aka device area) + * @intr_index: interrupt index + * @avail_index: initial device position in available ring + * @used_index: initial device position in used ring + */ +struct pds_vdpa_vq_init_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le16 qid; + __le16 len; + __le64 desc_addr; + __le64 avail_addr; + __le64 used_addr; + __le16 intr_index; + __le16 avail_index; + __le16 used_index; +}; + +/** + * struct pds_vdpa_vq_init_comp - queue init completion + * @status: Status of the command (enum pds_core_status_code) + * @hw_qtype: HW queue type, used in doorbell selection + * @hw_qindex: HW queue index, used in doorbell selection + * @rsvd: Word boundary padding + * @color: Color bit + */ +struct pds_vdpa_vq_init_comp { + u8 status; + u8 hw_qtype; + __le16 hw_qindex; + u8 rsvd[11]; + u8 color; +}; + +/** + * struct pds_vdpa_vq_reset_cmd - queue reset command + * @opcode: Opcode PDS_VDPA_CMD_VQ_RESET + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @qid: Queue id + */ +struct pds_vdpa_vq_reset_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le16 qid; +}; + +/** + * struct pds_vdpa_vq_reset_comp - queue reset completion + * @status: Status of the command (enum pds_core_status_code) + * @rsvd0: Word boundary padding + * @avail_index: current device position in available ring + * @used_index: current device position in used ring + * @rsvd: Word boundary padding + * @color: Color bit + */ +struct pds_vdpa_vq_reset_comp { + u8 status; + u8 rsvd0; + __le16 avail_index; + __le16 used_index; + u8 rsvd[9]; + u8 color; +}; + +/** + * struct pds_vdpa_set_features_cmd - set hw features + * @opcode: Opcode PDS_VDPA_CMD_SET_FEATURES + * @vdpa_index: Index for vdpa subdevice + * @vf_id: VF id + * @rsvd: Word boundary padding + * @features: Feature bit mask + */ +struct pds_vdpa_set_features_cmd { + u8 opcode; + u8 vdpa_index; + __le16 vf_id; + __le32 rsvd; + __le64 features; +}; + union pds_core_adminq_cmd { u8 opcode; u8 bytes[64]; @@ -600,6 +834,16 @@ union pds_core_adminq_cmd { struct pds_core_q_identify_cmd q_ident; struct pds_core_q_init_cmd q_init; + + struct pds_vdpa_cmd vdpa; + struct pds_vdpa_init_cmd vdpa_init; + struct pds_vdpa_ident_cmd vdpa_ident; + struct pds_vdpa_status_cmd vdpa_status; + struct pds_vdpa_setattr_cmd vdpa_setattr; + struct pds_vdpa_set_features_cmd vdpa_set_features; + struct pds_vdpa_vq_init_cmd vdpa_vq_init; + struct pds_vdpa_vq_reset_cmd vdpa_vq_reset; + }; union pds_core_adminq_comp { @@ -621,6 +865,9 @@ union pds_core_adminq_comp { struct pds_core_q_identify_comp q_ident; struct pds_core_q_init_comp q_init; + + struct pds_vdpa_vq_init_comp vdpa_vq_init; + struct pds_vdpa_vq_reset_comp vdpa_vq_reset; }; #ifndef __CHECKER__ diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h index 060331486d50..435c8e8161c2 100644 --- a/include/linux/pds/pds_common.h +++ b/include/linux/pds/pds_common.h @@ -39,26 +39,7 @@ enum pds_core_vif_types { #define PDS_DEV_TYPE_RDMA_STR "RDMA" #define PDS_DEV_TYPE_LM_STR "LM" -#define PDS_CORE_IFNAMSIZ 16 - -/** - * enum pds_core_logical_qtype - Logical Queue Types - * @PDS_CORE_QTYPE_ADMINQ: Administrative Queue - * @PDS_CORE_QTYPE_NOTIFYQ: Notify Queue - * @PDS_CORE_QTYPE_RXQ: Receive Queue - * @PDS_CORE_QTYPE_TXQ: Transmit Queue - * @PDS_CORE_QTYPE_EQ: Event Queue - * @PDS_CORE_QTYPE_MAX: Max queue type supported - */ -enum pds_core_logical_qtype { - PDS_CORE_QTYPE_ADMINQ = 0, - PDS_CORE_QTYPE_NOTIFYQ = 1, - PDS_CORE_QTYPE_RXQ = 2, - PDS_CORE_QTYPE_TXQ = 3, - PDS_CORE_QTYPE_EQ = 4, - - PDS_CORE_QTYPE_MAX = 16 /* don't change - used in struct size */ -}; +#define PDS_VDPA_DEV_NAME PDS_CORE_DRV_NAME "." PDS_DEV_TYPE_VDPA_STR int pdsc_register_notify(struct notifier_block *nb); void pdsc_unregister_notify(struct notifier_block *nb); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b93238db94e3..de6041deee37 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -103,6 +103,7 @@ int virtqueue_resize(struct virtqueue *vq, u32 num, * @config_enabled: configuration change reporting enabled * @config_change_pending: configuration change reported while disabled * @config_lock: protects configuration change reporting + * @vqs_list_lock: protects @vqs. * @dev: underlying device. * @id: the device type identification (used to match it with a driver). * @config: the configuration ops for this device. @@ -117,7 +118,7 @@ struct virtio_device { bool config_enabled; bool config_change_pending; spinlock_t config_lock; - spinlock_t vqs_list_lock; /* Protects VQs list access */ + spinlock_t vqs_list_lock; struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; @@ -160,6 +161,8 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev); * @feature_table_size: number of entries in the feature table array. * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. + * @validate: the function to call to validate features and config space. + * Returns 0 or -errno. * @probe: the function to call when a device is found. Returns 0 or -errno. * @scan: optional function to call after successful probe; intended * for virtio-scsi to invoke a scan. diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h index c4eeb79b0139..067ac1d789bc 100644 --- a/include/linux/virtio_pci_modern.h +++ b/include/linux/virtio_pci_modern.h @@ -38,6 +38,12 @@ struct virtio_pci_modern_device { int modern_bars; struct virtio_device_id id; + + /* optional check for vendor virtio device, returns dev_id or -ERRNO */ + int (*device_id_check)(struct pci_dev *pdev); + + /* optional mask for devices with limited DMA space */ + u64 dma_mask; }; /* diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 92e1b700b51c..f5c48b61ab62 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -45,6 +45,25 @@ #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +/* By default, a device gets one vhost_worker that its virtqueues share. This + * command allows the owner of the device to create an additional vhost_worker + * for the device. It can later be bound to 1 or more of its virtqueues using + * the VHOST_ATTACH_VRING_WORKER command. + * + * This must be called after VHOST_SET_OWNER and the caller must be the owner + * of the device. The new thread will inherit caller's cgroups and namespaces, + * and will share the caller's memory space. The new thread will also be + * counted against the caller's RLIMIT_NPROC value. + * + * The worker's ID used in other commands will be returned in + * vhost_worker_state. + */ +#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state) +/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any + * virtqueue. If userspace is not able to call this for workers its created, + * the kernel will free all the device's workers when the device is closed. + */ +#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state) /* Ring setup. */ /* Set number of descriptors in ring. This parameter can not @@ -70,6 +89,18 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's + * virtqueues. + * + * This will replace the virtqueue's existing worker. If the replaced worker + * is no longer attached to any virtqueues, it can be freed with + * VHOST_FREE_WORKER. + */ +#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \ + struct vhost_vring_worker) +/* Return the vring worker's ID */ +#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \ + struct vhost_vring_worker) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index c5690a8992d8..d3aad12ad1fa 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -47,6 +47,22 @@ struct vhost_vring_addr { __u64 log_guest_addr; }; +struct vhost_worker_state { + /* + * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id. + * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker + * to free. + */ + unsigned int worker_id; +}; + +struct vhost_vring_worker { + /* vring index */ + unsigned int index; + /* The id of the vhost_worker returned from VHOST_NEW_WORKER */ + unsigned int worker_id; +}; + /* no alignment requirement */ struct vhost_iotlb_msg { __u64 iova; |