summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/vfio/lib
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/vfio/lib')
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c427
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h35
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h82
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h22
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c464
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c93
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c103
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c401
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
17 files changed, 2282 insertions, 0 deletions
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
new file mode 100644
index 000000000000..19d9630b24c2
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/bits.h>
+#include <linux/errno.h>
+#include <linux/idxd.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <libvfio.h>
+
+#include "registers.h"
+
+/* Vectors 1+ are available for work queue completion interrupts. */
+#define MSIX_VECTOR 1
+
+struct dsa_state {
+ /* Descriptors for copy and batch operations. */
+ struct dsa_hw_desc batch[32];
+ struct dsa_hw_desc copy[1024];
+
+ /* Completion records for copy and batch operations. */
+ struct dsa_completion_record copy_completion;
+ struct dsa_completion_record batch_completion;
+
+ /* Cached device registers (and derived data) for easy access */
+ union gen_cap_reg gen_cap;
+ union wq_cap_reg wq_cap;
+ union group_cap_reg group_cap;
+ union engine_cap_reg engine_cap;
+ union offsets_reg table_offsets;
+ void *wqcfg_table;
+ void *grpcfg_table;
+ u64 max_batches;
+ u64 max_copies_per_batch;
+
+ /* The number of ongoing memcpy operations. */
+ u64 memcpy_count;
+
+ /* Buffers used by dsa_send_msi() to generate an interrupt */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct dsa_state *to_dsa_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static bool dsa_int_handle_request_required(struct vfio_pci_device *device)
+{
+ void *bar0 = device->bars[0].vaddr;
+ union gen_cap_reg gen_cap;
+ u32 cmd_cap;
+
+ gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ if (!gen_cap.cmd_cap)
+ return false;
+
+ cmd_cap = readl(bar0 + IDXD_CMDCAP_OFFSET);
+ return (cmd_cap >> IDXD_CMD_REQUEST_INT_HANDLE) & 1;
+}
+
+static int dsa_probe(struct vfio_pci_device *device)
+{
+ const u16 vendor_id = vfio_pci_config_readw(device, PCI_VENDOR_ID);
+ const u16 device_id = vfio_pci_config_readw(device, PCI_DEVICE_ID);
+
+ if (vendor_id != PCI_VENDOR_ID_INTEL)
+ return -EINVAL;
+
+ switch (device_id) {
+ case PCI_DEVICE_ID_INTEL_DSA_SPR0:
+ case PCI_DEVICE_ID_INTEL_DSA_DMR:
+ case PCI_DEVICE_ID_INTEL_DSA_GNRD:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (dsa_int_handle_request_required(device)) {
+ dev_err(device, "Device requires requesting interrupt handles\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void dsa_check_sw_err(struct vfio_pci_device *device)
+{
+ void *reg = device->bars[0].vaddr + IDXD_SWERR_OFFSET;
+ union sw_err_reg err = {};
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(err.bits); i++) {
+ err.bits[i] = readq(reg + offsetof(union sw_err_reg, bits[i]));
+
+ /* No errors */
+ if (i == 0 && !err.valid)
+ return;
+ }
+
+ dev_err(device, "SWERR: 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ err.bits[0], err.bits[1], err.bits[2], err.bits[3]);
+
+ dev_err(device, " valid: 0x%x\n", err.valid);
+ dev_err(device, " overflow: 0x%x\n", err.overflow);
+ dev_err(device, " desc_valid: 0x%x\n", err.desc_valid);
+ dev_err(device, " wq_idx_valid: 0x%x\n", err.wq_idx_valid);
+ dev_err(device, " batch: 0x%x\n", err.batch);
+ dev_err(device, " fault_rw: 0x%x\n", err.fault_rw);
+ dev_err(device, " priv: 0x%x\n", err.priv);
+ dev_err(device, " error: 0x%x\n", err.error);
+ dev_err(device, " wq_idx: 0x%x\n", err.wq_idx);
+ dev_err(device, " operation: 0x%x\n", err.operation);
+ dev_err(device, " pasid: 0x%x\n", err.pasid);
+ dev_err(device, " batch_idx: 0x%x\n", err.batch_idx);
+ dev_err(device, " invalid_flags: 0x%x\n", err.invalid_flags);
+ dev_err(device, " fault_addr: 0x%lx\n", err.fault_addr);
+
+ VFIO_FAIL("Software Error Detected!\n");
+}
+
+static void dsa_command(struct vfio_pci_device *device, u32 cmd)
+{
+ union idxd_command_reg cmd_reg = { .cmd = cmd };
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ void *bar0 = device->bars[0].vaddr;
+ u32 status;
+ u8 err;
+
+ writel(cmd_reg.bits, bar0 + IDXD_CMD_OFFSET);
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = readl(bar0 + IDXD_CMDSTS_OFFSET);
+ if (!(status & IDXD_CMDSTS_ACTIVE))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ err = status & IDXD_CMDSTS_ERR_MASK;
+ VFIO_ASSERT_EQ(err, 0, "Error issuing command 0x%x: 0x%x\n", cmd, err);
+}
+
+static void dsa_wq_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union wq_cap_reg wq_cap = dsa->wq_cap;
+ union wqcfg wqcfg;
+ u64 wqcfg_size;
+ int i;
+
+ VFIO_ASSERT_GT((u32)wq_cap.num_wqs, 0);
+
+ wqcfg = (union wqcfg) {
+ .wq_size = wq_cap.total_wq_size,
+ .mode = 1,
+ .priority = 1,
+ /*
+ * Disable Address Translation Service (if enabled) so that VFIO
+ * selftests using this driver can generate I/O page faults.
+ */
+ .wq_ats_disable = wq_cap.wq_ats_support,
+ .max_xfer_shift = dsa->gen_cap.max_xfer_shift,
+ .max_batch_shift = dsa->gen_cap.max_batch_shift,
+ .op_config[0] = BIT(DSA_OPCODE_MEMMOVE) | BIT(DSA_OPCODE_BATCH),
+ };
+
+ wqcfg_size = 1UL << (wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+
+ for (i = 0; i < wqcfg_size / sizeof(wqcfg.bits[0]); i++)
+ writel(wqcfg.bits[i], dsa->wqcfg_table + offsetof(union wqcfg, bits[i]));
+}
+
+static void dsa_group_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ union group_cap_reg group_cap = dsa->group_cap;
+ union engine_cap_reg engine_cap = dsa->engine_cap;
+
+ VFIO_ASSERT_GT((u32)group_cap.num_groups, 0);
+ VFIO_ASSERT_GT((u32)engine_cap.num_engines, 0);
+
+ /* Assign work queue 0 and engine 0 to group 0 */
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, wqs[0]));
+ writeq(1, dsa->grpcfg_table + offsetof(struct grpcfg, engines));
+}
+
+static void dsa_register_cache_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ void *bar0 = device->bars[0].vaddr;
+
+ dsa->gen_cap.bits = readq(bar0 + IDXD_GENCAP_OFFSET);
+ dsa->wq_cap.bits = readq(bar0 + IDXD_WQCAP_OFFSET);
+ dsa->group_cap.bits = readq(bar0 + IDXD_GRPCAP_OFFSET);
+ dsa->engine_cap.bits = readq(bar0 + IDXD_ENGCAP_OFFSET);
+
+ dsa->table_offsets.bits[0] = readq(bar0 + IDXD_TABLE_OFFSET);
+ dsa->table_offsets.bits[1] = readq(bar0 + IDXD_TABLE_OFFSET + 8);
+
+ dsa->wqcfg_table = bar0 + dsa->table_offsets.wqcfg * IDXD_TABLE_MULT;
+ dsa->grpcfg_table = bar0 + dsa->table_offsets.grpcfg * IDXD_TABLE_MULT;
+
+ dsa->max_batches = 1U << (dsa->wq_cap.total_wq_size + IDXD_WQCFG_MIN);
+ dsa->max_batches = min(dsa->max_batches, ARRAY_SIZE(dsa->batch));
+
+ dsa->max_copies_per_batch = 1UL << dsa->gen_cap.max_batch_shift;
+ dsa->max_copies_per_batch = min(dsa->max_copies_per_batch, ARRAY_SIZE(dsa->copy));
+}
+
+static void dsa_init(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*dsa));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+
+ dsa_register_cache_init(device);
+ dsa_wq_init(device);
+ dsa_group_init(device);
+
+ dsa_command(device, IDXD_CMD_ENABLE_DEVICE);
+ dsa_command(device, IDXD_CMD_ENABLE_WQ);
+
+ vfio_pci_msix_enable(device, MSIX_VECTOR, 1);
+
+ device->driver.max_memcpy_count =
+ dsa->max_batches * dsa->max_copies_per_batch;
+ device->driver.max_memcpy_size = 1UL << dsa->gen_cap.max_xfer_shift;
+ device->driver.msi = MSIX_VECTOR;
+}
+
+static void dsa_remove(struct vfio_pci_device *device)
+{
+ dsa_command(device, IDXD_CMD_RESET_DEVICE);
+ vfio_pci_msix_disable(device);
+}
+
+static int dsa_completion_wait(struct vfio_pci_device *device,
+ struct dsa_completion_record *completion)
+{
+ u8 status;
+
+ for (;;) {
+ dsa_check_sw_err(device);
+
+ status = READ_ONCE(completion->status);
+ if (status)
+ break;
+
+ usleep(1000);
+ }
+
+ if (status == DSA_COMP_SUCCESS)
+ return 0;
+
+ dev_err(device, "Error detected during memcpy operation: 0x%x\n", status);
+ return -1;
+}
+
+static void dsa_copy_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ iova_t src, iova_t dst, u64 size,
+ bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ u16 flags;
+
+ flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR;
+
+ if (interrupt)
+ flags |= IDXD_OP_FLAG_RCI;
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_MEMMOVE,
+ .flags = flags,
+ .priv = 1,
+ .src_addr = src,
+ .dst_addr = dst,
+ .xfer_size = size,
+ .completion_addr = to_iova(device, &dsa->copy_completion),
+ .int_handle = interrupt ? MSIX_VECTOR : 0,
+ };
+}
+
+static void dsa_batch_desc_init(struct vfio_pci_device *device,
+ struct dsa_hw_desc *desc,
+ u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ *desc = (struct dsa_hw_desc) {
+ .opcode = DSA_OPCODE_BATCH,
+ .flags = IDXD_OP_FLAG_CRAV,
+ .priv = 1,
+ .completion_addr = to_iova(device, &dsa->batch_completion),
+ .desc_list_addr = to_iova(device, &dsa->copy[0]),
+ .desc_count = count,
+ };
+}
+
+static void dsa_desc_write(struct vfio_pci_device *device, struct dsa_hw_desc *desc)
+{
+ /* Write the contents (not address) of the 64-byte descriptor to the device. */
+ iosubmit_cmds512(device->bars[2].vaddr, desc, 1);
+}
+
+static void dsa_memcpy_one(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, bool interrupt)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ memset(&dsa->copy_completion, 0, sizeof(dsa->copy_completion));
+
+ dsa_copy_desc_init(device, &dsa->copy[0], src, dst, size, interrupt);
+ dsa_desc_write(device, &dsa->copy[0]);
+}
+
+static void dsa_memcpy_batch(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int i;
+
+ memset(&dsa->batch_completion, 0, sizeof(dsa->batch_completion));
+
+ for (i = 0; i < ARRAY_SIZE(dsa->copy); i++) {
+ struct dsa_hw_desc *copy_desc = &dsa->copy[i];
+
+ dsa_copy_desc_init(device, copy_desc, src, dst, size, false);
+
+ /* Don't request completions for individual copies. */
+ copy_desc->flags &= ~IDXD_OP_FLAG_RCR;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dsa->batch) && count; i++) {
+ struct dsa_hw_desc *batch_desc = &dsa->batch[i];
+ int nr_copies;
+
+ nr_copies = min(count, dsa->max_copies_per_batch);
+ count -= nr_copies;
+
+ /*
+ * Batches must have at least 2 copies, so handle the case where
+ * there is exactly 1 copy left by doing one less copy in this
+ * batch and then 2 in the next.
+ */
+ if (count == 1) {
+ nr_copies--;
+ count++;
+ }
+
+ dsa_batch_desc_init(device, batch_desc, nr_copies);
+
+ /* Request a completion for the last batch. */
+ if (!count)
+ batch_desc->flags |= IDXD_OP_FLAG_RCR;
+
+ dsa_desc_write(device, batch_desc);
+ }
+
+ VFIO_ASSERT_EQ(count, 0, "Failed to start %lu copies.\n", count);
+}
+
+static void dsa_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ /* DSA devices require at least 2 copies per batch. */
+ if (count == 1)
+ dsa_memcpy_one(device, src, dst, size, false);
+ else
+ dsa_memcpy_batch(device, src, dst, size, count);
+
+ dsa->memcpy_count = count;
+}
+
+static int dsa_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+ int r;
+
+ if (dsa->memcpy_count == 1)
+ r = dsa_completion_wait(device, &dsa->copy_completion);
+ else
+ r = dsa_completion_wait(device, &dsa->batch_completion);
+
+ dsa->memcpy_count = 0;
+
+ return r;
+}
+
+static void dsa_send_msi(struct vfio_pci_device *device)
+{
+ struct dsa_state *dsa = to_dsa_state(device);
+
+ dsa_memcpy_one(device,
+ to_iova(device, &dsa->send_msi_src),
+ to_iova(device, &dsa->send_msi_dst),
+ sizeof(dsa->send_msi_src), true);
+
+ VFIO_ASSERT_EQ(dsa_completion_wait(device, &dsa->copy_completion), 0);
+}
+
+const struct vfio_pci_driver_ops dsa_ops = {
+ .name = "dsa",
+ .probe = dsa_probe,
+ .init = dsa_init,
+ .remove = dsa_remove,
+ .memcpy_start = dsa_memcpy_start,
+ .memcpy_wait = dsa_memcpy_wait,
+ .send_msi = dsa_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
new file mode 120000
index 000000000000..bde657c3c2af
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/dsa/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/idxd/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
new file mode 120000
index 000000000000..8ab52ddd4458
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/hw.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/hw.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
new file mode 100644
index 000000000000..a871b935542b
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdint.h>
+#include <unistd.h>
+
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci_ids.h>
+#include <linux/sizes.h>
+
+#include <libvfio.h>
+
+#include "hw.h"
+#include "registers.h"
+
+#define IOAT_DMACOUNT_MAX UINT16_MAX
+
+struct ioat_state {
+ /* Single descriptor used to issue DMA memcpy operations */
+ struct ioat_dma_descriptor desc;
+
+ /* Copy buffers used by ioat_send_msi() to generate an interrupt. */
+ u64 send_msi_src;
+ u64 send_msi_dst;
+};
+
+static inline struct ioat_state *to_ioat_state(struct vfio_pci_device *device)
+{
+ return device->driver.region.vaddr;
+}
+
+static inline void *ioat_channel_registers(struct vfio_pci_device *device)
+{
+ return device->bars[0].vaddr + IOAT_CHANNEL_MMIO_SIZE;
+}
+
+static int ioat_probe(struct vfio_pci_device *device)
+{
+ u8 version;
+ int r;
+
+ if (!vfio_pci_device_match(device, PCI_VENDOR_ID_INTEL,
+ PCI_DEVICE_ID_INTEL_IOAT_SKX))
+ return -EINVAL;
+
+ VFIO_ASSERT_NOT_NULL(device->bars[0].vaddr);
+
+ version = readb(device->bars[0].vaddr + IOAT_VER_OFFSET);
+ switch (version) {
+ case IOAT_VER_3_2:
+ case IOAT_VER_3_3:
+ r = 0;
+ break;
+ default:
+ dev_err(device, "ioat: Unsupported version: 0x%x\n", version);
+ r = -EINVAL;
+ }
+ return r;
+}
+
+static u64 ioat_channel_status(void *bar)
+{
+ return readq(bar + IOAT_CHANSTS_OFFSET) & IOAT_CHANSTS_STATUS;
+}
+
+static void ioat_clear_errors(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 errors;
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET);
+ vfio_pci_config_writel(device, IOAT_PCI_CHANERR_INT_OFFSET, errors);
+
+ errors = readl(registers + IOAT_CHANERR_OFFSET);
+ writel(errors, registers + IOAT_CHANERR_OFFSET);
+}
+
+static void ioat_reset(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u32 sleep_ms = 1, attempts = 5000 / sleep_ms;
+ u8 chancmd;
+
+ ioat_clear_errors(device);
+
+ writeb(IOAT_CHANCMD_RESET, registers + IOAT2_CHANCMD_OFFSET);
+
+ for (;;) {
+ chancmd = readb(registers + IOAT2_CHANCMD_OFFSET);
+ if (!(chancmd & IOAT_CHANCMD_RESET))
+ break;
+
+ VFIO_ASSERT_GT(--attempts, 0);
+ usleep(sleep_ms * 1000);
+ }
+
+ VFIO_ASSERT_EQ(ioat_channel_status(registers), IOAT_CHANSTS_HALTED);
+}
+
+static void ioat_init(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+ u8 intrctrl;
+
+ VFIO_ASSERT_GE(device->driver.region.size, sizeof(*ioat));
+
+ vfio_pci_config_writew(device, PCI_COMMAND,
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+
+ ioat_reset(device);
+
+ /* Enable the use of MXI-x interrupts for channel interrupts. */
+ intrctrl = IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ writeb(intrctrl, device->bars[0].vaddr + IOAT_INTRCTRL_OFFSET);
+
+ vfio_pci_msix_enable(device, 0, device->msix_info.count);
+
+ device->driver.msi = 0;
+ device->driver.max_memcpy_size =
+ 1UL << readb(device->bars[0].vaddr + IOAT_XFERCAP_OFFSET);
+ device->driver.max_memcpy_count = IOAT_DMACOUNT_MAX;
+}
+
+static void ioat_remove(struct vfio_pci_device *device)
+{
+ ioat_reset(device);
+ vfio_pci_msix_disable(device);
+}
+
+static void ioat_handle_error(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+
+ dev_err(device, "Error detected during memcpy operation!\n"
+ " CHANERR: 0x%x\n"
+ " CHANERR_INT: 0x%x\n"
+ " DMAUNCERRSTS: 0x%x\n",
+ readl(registers + IOAT_CHANERR_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_CHANERR_INT_OFFSET),
+ vfio_pci_config_readl(device, IOAT_PCI_DMAUNCERRSTS_OFFSET));
+
+ ioat_reset(device);
+}
+
+static int ioat_memcpy_wait(struct vfio_pci_device *device)
+{
+ void *registers = ioat_channel_registers(device);
+ u64 status;
+ int r = 0;
+
+ /* Wait until all operations complete. */
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_DONE)
+ break;
+
+ if (status == IOAT_CHANSTS_HALTED) {
+ ioat_handle_error(device);
+ return -1;
+ }
+ }
+
+ /* Put the channel into the SUSPENDED state. */
+ writeb(IOAT_CHANCMD_SUSPEND, registers + IOAT2_CHANCMD_OFFSET);
+ for (;;) {
+ status = ioat_channel_status(registers);
+ if (status == IOAT_CHANSTS_SUSPENDED)
+ break;
+ }
+
+ return r;
+}
+
+static void __ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u16 count, bool interrupt)
+{
+ void *registers = ioat_channel_registers(device);
+ struct ioat_state *ioat = to_ioat_state(device);
+ u64 desc_iova;
+ u16 chanctrl;
+
+ desc_iova = to_iova(device, &ioat->desc);
+ ioat->desc = (struct ioat_dma_descriptor) {
+ .ctl_f.op = IOAT_OP_COPY,
+ .ctl_f.int_en = interrupt,
+ .src_addr = src,
+ .dst_addr = dst,
+ .size = size,
+ .next = desc_iova,
+ };
+
+ /* Tell the device the address of the descriptor. */
+ writeq(desc_iova, registers + IOAT2_CHAINADDR_OFFSET);
+
+ /* (Re)Enable the channel interrupt and abort on any errors */
+ chanctrl = IOAT_CHANCTRL_INT_REARM | IOAT_CHANCTRL_ANY_ERR_ABORT_EN;
+ writew(chanctrl, registers + IOAT_CHANCTRL_OFFSET);
+
+ /* Kick off @count DMA copy operation(s). */
+ writew(count, registers + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ __ioat_memcpy_start(device, src, dst, size, count, false);
+}
+
+static void ioat_send_msi(struct vfio_pci_device *device)
+{
+ struct ioat_state *ioat = to_ioat_state(device);
+
+ __ioat_memcpy_start(device,
+ to_iova(device, &ioat->send_msi_src),
+ to_iova(device, &ioat->send_msi_dst),
+ sizeof(ioat->send_msi_src), 1, true);
+
+ VFIO_ASSERT_EQ(ioat_memcpy_wait(device), 0);
+}
+
+const struct vfio_pci_driver_ops ioat_ops = {
+ .name = "ioat",
+ .probe = ioat_probe,
+ .init = ioat_init,
+ .remove = ioat_remove,
+ .memcpy_start = ioat_memcpy_start,
+ .memcpy_wait = ioat_memcpy_wait,
+ .send_msi = ioat_send_msi,
+};
diff --git a/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
new file mode 120000
index 000000000000..0b809cfd8fe6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/drivers/ioat/registers.h
@@ -0,0 +1 @@
+../../../../../../../drivers/dma/ioat/registers.h \ No newline at end of file
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio.h b/tools/testing/selftests/vfio/lib/include/libvfio.h
new file mode 100644
index 000000000000..1b6da54cc2cb
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/iova_allocator.h>
+#include <libvfio/vfio_pci_device.h>
+#include <libvfio/vfio_pci_driver.h>
+
+/*
+ * Return the BDF string of the device that the test should use.
+ *
+ * If a BDF string is provided by the user on the command line (as the last
+ * element of argv[]), then this function will return that and decrement argc
+ * by 1.
+ *
+ * Otherwise this function will attempt to use the environment variable
+ * $VFIO_SELFTESTS_BDF.
+ *
+ * If BDF cannot be determined then the test will exit with KSFT_SKIP.
+ */
+const char *vfio_selftests_get_bdf(int *argc, char *argv[]);
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs);
+
+/*
+ * Reserve virtual address space of size at an address satisfying
+ * (vaddr % align) == offset.
+ *
+ * Returns the reserved vaddr. The caller is responsible for unmapping
+ * the returned region.
+ */
+void *mmap_reserve(size_t size, size_t align, size_t offset);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/assert.h b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
new file mode 100644
index 000000000000..f4ebd122d9b6
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/assert.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../../../../kselftest.h"
+
+#define VFIO_LOG_AND_EXIT(...) do { \
+ fprintf(stderr, " " __VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+#define VFIO_ASSERT_OP(_lhs, _rhs, _op, ...) do { \
+ typeof(_lhs) __lhs = (_lhs); \
+ typeof(_rhs) __rhs = (_rhs); \
+ \
+ if (__lhs _op __rhs) \
+ break; \
+ \
+ fprintf(stderr, "%s:%u: Assertion Failure\n\n", __FILE__, __LINE__); \
+ fprintf(stderr, " Expression: " #_lhs " " #_op " " #_rhs "\n"); \
+ fprintf(stderr, " Observed: %#lx %s %#lx\n", \
+ (u64)__lhs, #_op, (u64)__rhs); \
+ fprintf(stderr, " [errno: %d - %s]\n", errno, strerror(errno)); \
+ VFIO_LOG_AND_EXIT(__VA_ARGS__); \
+} while (0)
+
+#define VFIO_ASSERT_EQ(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, ==, ##__VA_ARGS__)
+#define VFIO_ASSERT_NE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, !=, ##__VA_ARGS__)
+#define VFIO_ASSERT_LT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <, ##__VA_ARGS__)
+#define VFIO_ASSERT_LE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, <=, ##__VA_ARGS__)
+#define VFIO_ASSERT_GT(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >, ##__VA_ARGS__)
+#define VFIO_ASSERT_GE(_a, _b, ...) VFIO_ASSERT_OP(_a, _b, >=, ##__VA_ARGS__)
+#define VFIO_ASSERT_TRUE(_a, ...) VFIO_ASSERT_NE(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_FALSE(_a, ...) VFIO_ASSERT_EQ(false, (_a), ##__VA_ARGS__)
+#define VFIO_ASSERT_NULL(_a, ...) VFIO_ASSERT_EQ(NULL, _a, ##__VA_ARGS__)
+#define VFIO_ASSERT_NOT_NULL(_a, ...) VFIO_ASSERT_NE(NULL, _a, ##__VA_ARGS__)
+
+#define VFIO_FAIL(_fmt, ...) do { \
+ fprintf(stderr, "%s:%u: FAIL\n\n", __FILE__, __LINE__); \
+ VFIO_LOG_AND_EXIT(_fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define ioctl_assert(_fd, _op, _arg) do { \
+ void *__arg = (_arg); \
+ int __ret = ioctl((_fd), (_op), (__arg)); \
+ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
+} while (0)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_ASSERT_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
new file mode 100644
index 000000000000..e9a3386a4719
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iommu.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+#include <libvfio/assert.h>
+
+typedef u64 iova_t;
+
+struct iommu_mode {
+ const char *name;
+ const char *container_path;
+ unsigned long iommu_type;
+};
+
+extern const char *default_iommu_mode;
+
+struct dma_region {
+ struct list_head link;
+ void *vaddr;
+ iova_t iova;
+ u64 size;
+};
+
+struct iommu {
+ const struct iommu_mode *mode;
+ int container_fd;
+ int iommufd;
+ u32 ioas_id;
+ struct list_head dma_regions;
+};
+
+struct iommu *iommu_init(const char *iommu_mode);
+void iommu_cleanup(struct iommu *iommu);
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region);
+
+static inline void iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_map(iommu, region), 0);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped);
+
+static inline void iommu_unmap(struct iommu *iommu, struct dma_region *region)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap(iommu, region, NULL), 0);
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped);
+
+static inline void iommu_unmap_all(struct iommu *iommu)
+{
+ VFIO_ASSERT_EQ(__iommu_unmap_all(iommu, NULL), 0);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova);
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr);
+
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges);
+
+#define MODE_VFIO_TYPE1_IOMMU "vfio_type1_iommu"
+#define MODE_VFIO_TYPE1V2_IOMMU "vfio_type1v2_iommu"
+#define MODE_IOMMUFD_COMPAT_TYPE1 "iommufd_compat_type1"
+#define MODE_IOMMUFD_COMPAT_TYPE1V2 "iommufd_compat_type1v2"
+#define MODE_IOMMUFD "iommufd"
+
+/*
+ * Generator for VFIO selftests fixture variants that replicate across all
+ * possible IOMMU modes. Tests must define FIXTURE_VARIANT_ADD_IOMMU_MODE()
+ * which should then use FIXTURE_VARIANT_ADD() to create the variant.
+ */
+#define FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(...) \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(vfio_type1v2_iommu, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd_compat_type1v2, ##__VA_ARGS__); \
+FIXTURE_VARIANT_ADD_IOMMU_MODE(iommufd, ##__VA_ARGS__)
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOMMU_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
new file mode 100644
index 000000000000..c7c0796a757f
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/iommufd.h>
+
+#include <libvfio/iommu.h>
+
+struct iova_allocator {
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+ u32 range_idx;
+ u64 range_offset;
+};
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu);
+void iova_allocator_cleanup(struct iova_allocator *allocator);
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_IOVA_ALLOCATOR_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
new file mode 100644
index 000000000000..2858885a89bb
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H
+
+#include <fcntl.h>
+#include <linux/vfio.h>
+#include <linux/pci_regs.h>
+
+#include <libvfio/assert.h>
+#include <libvfio/iommu.h>
+#include <libvfio/vfio_pci_driver.h>
+
+struct vfio_pci_bar {
+ struct vfio_region_info info;
+ void *vaddr;
+};
+
+struct vfio_pci_device {
+ const char *bdf;
+ int fd;
+ int group_fd;
+
+ struct iommu *iommu;
+
+ struct vfio_device_info info;
+ struct vfio_region_info config_space;
+ struct vfio_pci_bar bars[PCI_STD_NUM_BARS];
+
+ struct vfio_irq_info msi_info;
+ struct vfio_irq_info msix_info;
+
+ /* eventfds for MSI and MSI-x interrupts */
+ int msi_eventfds[PCI_MSIX_FLAGS_QSIZE + 1];
+
+ struct vfio_pci_driver driver;
+};
+
+#define dev_info(_dev, _fmt, ...) printf("%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+#define dev_err(_dev, _fmt, ...) fprintf(stderr, "%s: " _fmt, (_dev)->bdf, ##__VA_ARGS__)
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu);
+void vfio_pci_device_cleanup(struct vfio_pci_device *device);
+
+void vfio_pci_device_reset(struct vfio_pci_device *device);
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data);
+
+#define vfio_pci_config_read(_device, _offset, _type) ({ \
+ _type __data; \
+ vfio_pci_config_access((_device), false, _offset, sizeof(__data), &__data); \
+ __data; \
+})
+
+#define vfio_pci_config_readb(_d, _o) vfio_pci_config_read(_d, _o, u8)
+#define vfio_pci_config_readw(_d, _o) vfio_pci_config_read(_d, _o, u16)
+#define vfio_pci_config_readl(_d, _o) vfio_pci_config_read(_d, _o, u32)
+
+#define vfio_pci_config_write(_device, _offset, _value, _type) do { \
+ _type __data = (_value); \
+ vfio_pci_config_access((_device), true, _offset, sizeof(_type), &__data); \
+} while (0)
+
+#define vfio_pci_config_writeb(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u8)
+#define vfio_pci_config_writew(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u16)
+#define vfio_pci_config_writel(_d, _o, _v) vfio_pci_config_write(_d, _o, _v, u32)
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index,
+ u32 vector, int count);
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index);
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector);
+
+static inline void fcntl_set_nonblock(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GETFL, 0);
+ VFIO_ASSERT_NE(r, -1, "F_GETFL failed for fd %d\n", fd);
+
+ r = fcntl(fd, F_SETFL, r | O_NONBLOCK);
+ VFIO_ASSERT_NE(r, -1, "F_SETFL O_NONBLOCK failed for fd %d\n", fd);
+}
+
+static inline void vfio_pci_msi_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSI_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msi_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSI_IRQ_INDEX);
+}
+
+static inline void vfio_pci_msix_enable(struct vfio_pci_device *device,
+ u32 vector, int count)
+{
+ vfio_pci_irq_enable(device, VFIO_PCI_MSIX_IRQ_INDEX, vector, count);
+}
+
+static inline void vfio_pci_msix_disable(struct vfio_pci_device *device)
+{
+ vfio_pci_irq_disable(device, VFIO_PCI_MSIX_IRQ_INDEX);
+}
+
+static inline int __to_iova(struct vfio_pci_device *device, void *vaddr, iova_t *iova)
+{
+ return __iommu_hva2iova(device->iommu, vaddr, iova);
+}
+
+static inline iova_t to_iova(struct vfio_pci_device *device, void *vaddr)
+{
+ return iommu_hva2iova(device->iommu, vaddr);
+}
+
+static inline bool vfio_pci_device_match(struct vfio_pci_device *device,
+ u16 vendor_id, u16 device_id)
+{
+ return (vendor_id == vfio_pci_config_readw(device, PCI_VENDOR_ID)) &&
+ (device_id == vfio_pci_config_readw(device, PCI_DEVICE_ID));
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DEVICE_H */
diff --git a/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
new file mode 100644
index 000000000000..e5ada209b1d1
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+#define SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H
+
+#include <libvfio/iommu.h>
+
+struct vfio_pci_device;
+
+struct vfio_pci_driver_ops {
+ const char *name;
+
+ /**
+ * @probe() - Check if the driver supports the given device.
+ *
+ * Return: 0 on success, non-0 on failure.
+ */
+ int (*probe)(struct vfio_pci_device *device);
+
+ /**
+ * @init() - Initialize the driver for @device.
+ *
+ * Must be called after device->driver.region has been initialized.
+ */
+ void (*init)(struct vfio_pci_device *device);
+
+ /**
+ * remove() - Deinitialize the driver for @device.
+ */
+ void (*remove)(struct vfio_pci_device *device);
+
+ /**
+ * memcpy_start() - Kick off @count repeated memcpy operations from
+ * [@src, @src + @size) to [@dst, @dst + @size).
+ *
+ * Guarantees:
+ * - The device will attempt DMA reads on [src, src + size).
+ * - The device will attempt DMA writes on [dst, dst + size).
+ * - The device will not generate any interrupts.
+ *
+ * memcpy_start() returns immediately, it does not wait for the
+ * copies to complete.
+ */
+ void (*memcpy_start)(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size, u64 count);
+
+ /**
+ * memcpy_wait() - Wait until the memcpy operations started by
+ * memcpy_start() have finished.
+ *
+ * Guarantees:
+ * - All in-flight DMAs initiated by memcpy_start() are fully complete
+ * before memcpy_wait() returns.
+ *
+ * Returns non-0 if the driver detects that an error occurred during the
+ * memcpy, 0 otherwise.
+ */
+ int (*memcpy_wait)(struct vfio_pci_device *device);
+
+ /**
+ * send_msi() - Make the device send the MSI device->driver.msi.
+ *
+ * Guarantees:
+ * - The device will send the MSI once.
+ */
+ void (*send_msi)(struct vfio_pci_device *device);
+};
+
+struct vfio_pci_driver {
+ const struct vfio_pci_driver_ops *ops;
+ bool initialized;
+ bool memcpy_in_progress;
+
+ /* Region to be used by the driver (e.g. for in-memory descriptors) */
+ struct dma_region region;
+
+ /* The maximum size that can be passed to memcpy_start(). */
+ u64 max_memcpy_size;
+
+ /* The maximum count that can be passed to memcpy_start(). */
+ u64 max_memcpy_count;
+
+ /* The MSI vector the device will signal in ops->send_msi(). */
+ int msi;
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device);
+void vfio_pci_driver_init(struct vfio_pci_device *device);
+void vfio_pci_driver_remove(struct vfio_pci_device *device);
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size);
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count);
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device);
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device);
+
+#endif /* SELFTESTS_VFIO_LIB_INCLUDE_LIBVFIO_VFIO_PCI_DRIVER_H */
diff --git a/tools/testing/selftests/vfio/lib/iommu.c b/tools/testing/selftests/vfio/lib/iommu.c
new file mode 100644
index 000000000000..035dac069d60
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iommu.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+const char *default_iommu_mode = MODE_IOMMUFD;
+
+/* Reminder: Keep in sync with FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(). */
+static const struct iommu_mode iommu_modes[] = {
+ {
+ .name = MODE_VFIO_TYPE1_IOMMU,
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = MODE_VFIO_TYPE1V2_IOMMU,
+ .container_path = "/dev/vfio/vfio",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = MODE_IOMMUFD_COMPAT_TYPE1,
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1_IOMMU,
+ },
+ {
+ .name = MODE_IOMMUFD_COMPAT_TYPE1V2,
+ .container_path = "/dev/iommu",
+ .iommu_type = VFIO_TYPE1v2_IOMMU,
+ },
+ {
+ .name = MODE_IOMMUFD,
+ },
+};
+
+static const struct iommu_mode *lookup_iommu_mode(const char *iommu_mode)
+{
+ int i;
+
+ if (!iommu_mode)
+ iommu_mode = default_iommu_mode;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_modes); i++) {
+ if (strcmp(iommu_mode, iommu_modes[i].name))
+ continue;
+
+ return &iommu_modes[i];
+ }
+
+ VFIO_FAIL("Unrecognized IOMMU mode: %s\n", iommu_mode);
+}
+
+int __iommu_hva2iova(struct iommu *iommu, void *vaddr, iova_t *iova)
+{
+ struct dma_region *region;
+
+ list_for_each_entry(region, &iommu->dma_regions, link) {
+ if (vaddr < region->vaddr)
+ continue;
+
+ if (vaddr >= region->vaddr + region->size)
+ continue;
+
+ if (iova)
+ *iova = region->iova + (vaddr - region->vaddr);
+
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+iova_t iommu_hva2iova(struct iommu *iommu, void *vaddr)
+{
+ iova_t iova;
+ int ret;
+
+ ret = __iommu_hva2iova(iommu, vaddr, &iova);
+ VFIO_ASSERT_EQ(ret, 0, "%p is not mapped into the iommu\n", vaddr);
+
+ return iova;
+}
+
+static int vfio_iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct vfio_iommu_type1_dma_map args = {
+ .argsz = sizeof(args),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (u64)region->vaddr,
+ .iova = region->iova,
+ .size = region->size,
+ };
+
+ if (ioctl(iommu->container_fd, VFIO_IOMMU_MAP_DMA, &args))
+ return -errno;
+
+ return 0;
+}
+
+static int iommufd_map(struct iommu *iommu, struct dma_region *region)
+{
+ struct iommu_ioas_map args = {
+ .size = sizeof(args),
+ .flags = IOMMU_IOAS_MAP_READABLE |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_FIXED_IOVA,
+ .user_va = (u64)region->vaddr,
+ .iova = region->iova,
+ .length = region->size,
+ .ioas_id = iommu->ioas_id,
+ };
+
+ if (ioctl(iommu->iommufd, IOMMU_IOAS_MAP, &args))
+ return -errno;
+
+ return 0;
+}
+
+int __iommu_map(struct iommu *iommu, struct dma_region *region)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_map(iommu, region);
+ else
+ ret = vfio_iommu_map(iommu, region);
+
+ if (ret)
+ return ret;
+
+ list_add(&region->link, &iommu->dma_regions);
+
+ return 0;
+}
+
+static int __vfio_iommu_unmap(int fd, u64 iova, u64 size, u32 flags, u64 *unmapped)
+{
+ struct vfio_iommu_type1_dma_unmap args = {
+ .argsz = sizeof(args),
+ .iova = iova,
+ .size = size,
+ .flags = flags,
+ };
+
+ if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.size;
+
+ return 0;
+}
+
+static int vfio_iommu_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __vfio_iommu_unmap(iommu->container_fd, region->iova,
+ region->size, 0, unmapped);
+}
+
+static int __iommufd_unmap(int fd, u64 iova, u64 length, u32 ioas_id, u64 *unmapped)
+{
+ struct iommu_ioas_unmap args = {
+ .size = sizeof(args),
+ .iova = iova,
+ .length = length,
+ .ioas_id = ioas_id,
+ };
+
+ if (ioctl(fd, IOMMU_IOAS_UNMAP, &args))
+ return -errno;
+
+ if (unmapped)
+ *unmapped = args.length;
+
+ return 0;
+}
+
+static int iommufd_unmap(struct iommu *iommu, struct dma_region *region,
+ u64 *unmapped)
+{
+ return __iommufd_unmap(iommu->iommufd, region->iova, region->size,
+ iommu->ioas_id, unmapped);
+}
+
+int __iommu_unmap(struct iommu *iommu, struct dma_region *region, u64 *unmapped)
+{
+ int ret;
+
+ if (iommu->iommufd)
+ ret = iommufd_unmap(iommu, region, unmapped);
+ else
+ ret = vfio_iommu_unmap(iommu, region, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_del_init(&region->link);
+
+ return 0;
+}
+
+int __iommu_unmap_all(struct iommu *iommu, u64 *unmapped)
+{
+ int ret;
+ struct dma_region *curr, *next;
+
+ if (iommu->iommufd)
+ ret = __iommufd_unmap(iommu->iommufd, 0, UINT64_MAX,
+ iommu->ioas_id, unmapped);
+ else
+ ret = __vfio_iommu_unmap(iommu->container_fd, 0, 0,
+ VFIO_DMA_UNMAP_FLAG_ALL, unmapped);
+
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(curr, next, &iommu->dma_regions, link)
+ list_del_init(&curr->link);
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz,
+ u32 *cap_offset)
+{
+ struct vfio_info_cap_header *hdr;
+
+ if (!*cap_offset)
+ return NULL;
+
+ VFIO_ASSERT_LT(*cap_offset, bufsz);
+ VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+ hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+ *cap_offset = hdr->next;
+
+ return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+ u16 cap_id)
+{
+ struct vfio_info_cap_header *hdr;
+ u32 cap_offset = info->cap_offset;
+ u32 max_depth;
+ u32 depth = 0;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+ return NULL;
+
+ if (cap_offset)
+ VFIO_ASSERT_GE(cap_offset, sizeof(*info));
+
+ max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr);
+
+ while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+ depth++;
+ VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n");
+
+ if (hdr->id == cap_id)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(int container_fd)
+{
+ struct vfio_iommu_type1_info *info;
+
+ info = malloc(sizeof(*info));
+ VFIO_ASSERT_NOT_NULL(info);
+
+ *info = (struct vfio_iommu_type1_info) {
+ .argsz = sizeof(*info),
+ };
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ info = realloc(info, info->argsz);
+ VFIO_ASSERT_NOT_NULL(info);
+
+ ioctl_assert(container_fd, VFIO_IOMMU_GET_INFO, info);
+ VFIO_ASSERT_GE(info->argsz, sizeof(*info));
+
+ return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+ struct vfio_iommu_type1_info *info;
+ struct vfio_info_cap_header *hdr;
+ struct iommu_iova_range *ranges = NULL;
+
+ info = vfio_iommu_get_info(iommu->container_fd);
+ hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ VFIO_ASSERT_NOT_NULL(hdr);
+
+ cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+ VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+ ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+ ranges[i] = (struct iommu_iova_range){
+ .start = cap_range->iova_ranges[i].start,
+ .last = cap_range->iova_ranges[i].end,
+ };
+ }
+
+ *nranges = cap_range->nr_iovas;
+
+ free(info);
+ return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct iommu *iommu,
+ u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+ int ret;
+
+ struct iommu_ioas_iova_ranges query = {
+ .size = sizeof(query),
+ .ioas_id = iommu->ioas_id,
+ };
+
+ ret = ioctl(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ VFIO_ASSERT_EQ(ret, -1);
+ VFIO_ASSERT_EQ(errno, EMSGSIZE);
+ VFIO_ASSERT_GT(query.num_iovas, 0);
+
+ ranges = calloc(query.num_iovas, sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ query.allowed_iovas = (uintptr_t)ranges;
+
+ ioctl_assert(iommu->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ *nranges = query.num_iovas;
+
+ return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+ const struct iommu_iova_range *ra = a, *rb = b;
+
+ if (ra->start < rb->start)
+ return -1;
+
+ if (ra->start > rb->start)
+ return 1;
+
+ return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *iommu_iova_ranges(struct iommu *iommu, u32 *nranges)
+{
+ struct iommu_iova_range *ranges;
+
+ if (iommu->iommufd)
+ ranges = iommufd_iova_ranges(iommu, nranges);
+ else
+ ranges = vfio_iommu_iova_ranges(iommu, nranges);
+
+ if (!ranges)
+ return NULL;
+
+ VFIO_ASSERT_GT(*nranges, 0);
+
+ /* Sort and check that ranges are sane and non-overlapping */
+ qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+ VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+ for (u32 i = 1; i < *nranges; i++) {
+ VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+ VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+ }
+
+ return ranges;
+}
+
+static u32 iommufd_ioas_alloc(int iommufd)
+{
+ struct iommu_ioas_alloc args = {
+ .size = sizeof(args),
+ };
+
+ ioctl_assert(iommufd, IOMMU_IOAS_ALLOC, &args);
+ return args.out_ioas_id;
+}
+
+struct iommu *iommu_init(const char *iommu_mode)
+{
+ const char *container_path;
+ struct iommu *iommu;
+ int version;
+
+ iommu = calloc(1, sizeof(*iommu));
+ VFIO_ASSERT_NOT_NULL(iommu);
+
+ INIT_LIST_HEAD(&iommu->dma_regions);
+
+ iommu->mode = lookup_iommu_mode(iommu_mode);
+
+ container_path = iommu->mode->container_path;
+ if (container_path) {
+ iommu->container_fd = open(container_path, O_RDWR);
+ VFIO_ASSERT_GE(iommu->container_fd, 0, "open(%s) failed\n", container_path);
+
+ version = ioctl(iommu->container_fd, VFIO_GET_API_VERSION);
+ VFIO_ASSERT_EQ(version, VFIO_API_VERSION, "Unsupported version: %d\n", version);
+ } else {
+ /*
+ * Require device->iommufd to be >0 so that a simple non-0 check can be
+ * used to check if iommufd is enabled. In practice open() will never
+ * return 0 unless stdin is closed.
+ */
+ iommu->iommufd = open("/dev/iommu", O_RDWR);
+ VFIO_ASSERT_GT(iommu->iommufd, 0);
+
+ iommu->ioas_id = iommufd_ioas_alloc(iommu->iommufd);
+ }
+
+ return iommu;
+}
+
+void iommu_cleanup(struct iommu *iommu)
+{
+ if (iommu->iommufd)
+ VFIO_ASSERT_EQ(close(iommu->iommufd), 0);
+ else
+ VFIO_ASSERT_EQ(close(iommu->container_fd), 0);
+
+ free(iommu);
+}
diff --git a/tools/testing/selftests/vfio/lib/iova_allocator.c b/tools/testing/selftests/vfio/lib/iova_allocator.c
new file mode 100644
index 000000000000..8c1cc86b70cd
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/iova_allocator.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/iommufd.h>
+#include <linux/limits.h>
+#include <linux/mman.h>
+#include <linux/overflow.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+#include <libvfio.h>
+
+struct iova_allocator *iova_allocator_init(struct iommu *iommu)
+{
+ struct iova_allocator *allocator;
+ struct iommu_iova_range *ranges;
+ u32 nranges;
+
+ ranges = iommu_iova_ranges(iommu, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ allocator = malloc(sizeof(*allocator));
+ VFIO_ASSERT_NOT_NULL(allocator);
+
+ *allocator = (struct iova_allocator){
+ .ranges = ranges,
+ .nranges = nranges,
+ .range_idx = 0,
+ .range_offset = 0,
+ };
+
+ return allocator;
+}
+
+void iova_allocator_cleanup(struct iova_allocator *allocator)
+{
+ free(allocator->ranges);
+ free(allocator);
+}
+
+iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size)
+{
+ VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n");
+ VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n");
+
+ for (;;) {
+ struct iommu_iova_range *range;
+ iova_t iova, last;
+
+ VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges,
+ "IOVA allocator out of space\n");
+
+ range = &allocator->ranges[allocator->range_idx];
+ iova = range->start + allocator->range_offset;
+
+ /* Check for sufficient space at the current offset */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ /* Align iova to size */
+ iova = last & ~(size - 1);
+
+ /* Check for sufficient space at the aligned iova */
+ if (check_add_overflow(iova, size - 1, &last) ||
+ last > range->last)
+ goto next_range;
+
+ if (last == range->last) {
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ } else {
+ allocator->range_offset = last - range->start + 1;
+ }
+
+ return iova;
+
+next_range:
+ allocator->range_idx++;
+ allocator->range_offset = 0;
+ }
+}
+
diff --git a/tools/testing/selftests/vfio/lib/libvfio.c b/tools/testing/selftests/vfio/lib/libvfio.c
new file mode 100644
index 000000000000..3a3d1ed635c1
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <linux/align.h>
+
+#include "../../../kselftest.h"
+#include <libvfio.h>
+
+static bool is_bdf(const char *str)
+{
+ unsigned int s, b, d, f;
+ int length, count;
+
+ count = sscanf(str, "%4x:%2x:%2x.%2x%n", &s, &b, &d, &f, &length);
+ return count == 4 && length == strlen(str);
+}
+
+static char **get_bdfs_cmdline(int *argc, char *argv[], int *nr_bdfs)
+{
+ int i;
+
+ for (i = *argc - 1; i > 0 && is_bdf(argv[i]); i--)
+ continue;
+
+ i++;
+ *nr_bdfs = *argc - i;
+ *argc -= *nr_bdfs;
+
+ return *nr_bdfs ? &argv[i] : NULL;
+}
+
+static char *get_bdf_env(void)
+{
+ char *bdf;
+
+ bdf = getenv("VFIO_SELFTESTS_BDF");
+ if (!bdf)
+ return NULL;
+
+ VFIO_ASSERT_TRUE(is_bdf(bdf), "Invalid BDF: %s\n", bdf);
+ return bdf;
+}
+
+char **vfio_selftests_get_bdfs(int *argc, char *argv[], int *nr_bdfs)
+{
+ static char *env_bdf;
+ char **bdfs;
+
+ bdfs = get_bdfs_cmdline(argc, argv, nr_bdfs);
+ if (bdfs)
+ return bdfs;
+
+ env_bdf = get_bdf_env();
+ if (env_bdf) {
+ *nr_bdfs = 1;
+ return &env_bdf;
+ }
+
+ fprintf(stderr, "Unable to determine which device(s) to use, skipping test.\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address via environment variable:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " export VFIO_SELFTESTS_BDF=\"segment:bus:device.function\"\n");
+ fprintf(stderr, " %s [options]\n", argv[0]);
+ fprintf(stderr, "\n");
+ fprintf(stderr, "To pass the device address(es) via argv:\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s [options] segment:bus:device.function ...\n", argv[0]);
+ fprintf(stderr, "\n");
+ exit(KSFT_SKIP);
+}
+
+const char *vfio_selftests_get_bdf(int *argc, char *argv[])
+{
+ int nr_bdfs;
+
+ return vfio_selftests_get_bdfs(argc, argv, &nr_bdfs)[0];
+}
+
+void *mmap_reserve(size_t size, size_t align, size_t offset)
+{
+ void *map_base, *map_align;
+ size_t delta;
+
+ VFIO_ASSERT_GT(align, offset);
+ delta = align - offset;
+
+ map_base = mmap(NULL, size + align, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ VFIO_ASSERT_NE(map_base, MAP_FAILED);
+
+ map_align = (void *)(ALIGN((uintptr_t)map_base + delta, align) - delta);
+
+ if (map_align > map_base)
+ VFIO_ASSERT_EQ(munmap(map_base, map_align - map_base), 0);
+
+ VFIO_ASSERT_EQ(munmap(map_align + size, map_base + align - map_align), 0);
+
+ return map_align;
+}
diff --git a/tools/testing/selftests/vfio/lib/libvfio.mk b/tools/testing/selftests/vfio/lib/libvfio.mk
new file mode 100644
index 000000000000..9f47bceed16f
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/libvfio.mk
@@ -0,0 +1,29 @@
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+LIBVFIO_SRCDIR := $(selfdir)/vfio/lib
+
+LIBVFIO_C := iommu.c
+LIBVFIO_C += iova_allocator.c
+LIBVFIO_C += libvfio.c
+LIBVFIO_C += vfio_pci_device.c
+LIBVFIO_C += vfio_pci_driver.c
+
+ifeq ($(ARCH:x86_64=x86),x86)
+LIBVFIO_C += drivers/ioat/ioat.c
+LIBVFIO_C += drivers/dsa/dsa.c
+endif
+
+LIBVFIO_OUTPUT := $(OUTPUT)/libvfio
+
+LIBVFIO_O := $(patsubst %.c, $(LIBVFIO_OUTPUT)/%.o, $(LIBVFIO_C))
+
+LIBVFIO_O_DIRS := $(shell dirname $(LIBVFIO_O) | uniq)
+$(shell mkdir -p $(LIBVFIO_O_DIRS))
+
+CFLAGS += -I$(LIBVFIO_SRCDIR)/include
+
+$(LIBVFIO_O): $(LIBVFIO_OUTPUT)/%.o : $(LIBVFIO_SRCDIR)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBVFIO_OUTPUT)
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
new file mode 100644
index 000000000000..fc75e04ef010
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <dirent.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/align.h>
+#include <linux/iommufd.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/log2.h>
+#include <linux/mman.h>
+#include <linux/overflow.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+#include "kselftest.h"
+#include <libvfio.h>
+
+#define PCI_SYSFS_PATH "/sys/bus/pci/devices"
+
+static void vfio_pci_irq_set(struct vfio_pci_device *device,
+ u32 index, u32 vector, u32 count, int *fds)
+{
+ u8 buf[sizeof(struct vfio_irq_set) + sizeof(int) * count];
+ struct vfio_irq_set *irq = (void *)&buf;
+ int *irq_fds = (void *)&irq->data;
+
+ memset(buf, 0, sizeof(buf));
+
+ irq->argsz = sizeof(buf);
+ irq->flags = VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq->index = index;
+ irq->start = vector;
+ irq->count = count;
+
+ if (count) {
+ irq->flags |= VFIO_IRQ_SET_DATA_EVENTFD;
+ memcpy(irq_fds, fds, sizeof(int) * count);
+ } else {
+ irq->flags |= VFIO_IRQ_SET_DATA_NONE;
+ }
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, irq);
+}
+
+void vfio_pci_irq_trigger(struct vfio_pci_device *device, u32 index, u32 vector)
+{
+ struct vfio_irq_set irq = {
+ .argsz = sizeof(irq),
+ .flags = VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE,
+ .index = index,
+ .start = vector,
+ .count = 1,
+ };
+
+ ioctl_assert(device->fd, VFIO_DEVICE_SET_IRQS, &irq);
+}
+
+static void check_supported_irq_index(u32 index)
+{
+ /* VFIO selftests only supports MSI and MSI-x for now. */
+ VFIO_ASSERT_TRUE(index == VFIO_PCI_MSI_IRQ_INDEX ||
+ index == VFIO_PCI_MSIX_IRQ_INDEX,
+ "Unsupported IRQ index: %u\n", index);
+}
+
+void vfio_pci_irq_enable(struct vfio_pci_device *device, u32 index, u32 vector,
+ int count)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = vector; i < vector + count; i++) {
+ VFIO_ASSERT_LT(device->msi_eventfds[i], 0);
+ device->msi_eventfds[i] = eventfd(0, 0);
+ VFIO_ASSERT_GE(device->msi_eventfds[i], 0);
+ }
+
+ vfio_pci_irq_set(device, index, vector, count, device->msi_eventfds + vector);
+}
+
+void vfio_pci_irq_disable(struct vfio_pci_device *device, u32 index)
+{
+ int i;
+
+ check_supported_irq_index(index);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ device->msi_eventfds[i] = -1;
+ }
+
+ vfio_pci_irq_set(device, index, 0, 0, NULL);
+}
+
+static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index,
+ struct vfio_irq_info *irq_info)
+{
+ irq_info->argsz = sizeof(*irq_info);
+ irq_info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info);
+}
+
+static void vfio_pci_region_get(struct vfio_pci_device *device, int index,
+ struct vfio_region_info *info)
+{
+ memset(info, 0, sizeof(*info));
+
+ info->argsz = sizeof(*info);
+ info->index = index;
+
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_REGION_INFO, info);
+}
+
+static void vfio_pci_bar_map(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+ size_t align, size;
+ int prot = 0;
+ void *vaddr;
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NULL(bar->vaddr);
+ VFIO_ASSERT_TRUE(bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP);
+ VFIO_ASSERT_TRUE(is_power_of_2(bar->info.size));
+
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_READ)
+ prot |= PROT_READ;
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_WRITE)
+ prot |= PROT_WRITE;
+
+ size = bar->info.size;
+
+ /*
+ * Align BAR mmaps to improve page fault granularity during potential
+ * subsequent IOMMU mapping of these BAR vaddr. 1G for x86 is the
+ * largest hugepage size across any architecture, so no benefit from
+ * larger alignment. BARs smaller than 1G will be aligned by their
+ * power-of-two size, guaranteeing sufficient alignment for smaller
+ * hugepages, if present.
+ */
+ align = min_t(size_t, size, SZ_1G);
+
+ vaddr = mmap_reserve(size, align, 0);
+ bar->vaddr = mmap(vaddr, size, prot, MAP_SHARED | MAP_FIXED,
+ device->fd, bar->info.offset);
+ VFIO_ASSERT_NE(bar->vaddr, MAP_FAILED);
+
+ madvise(bar->vaddr, size, MADV_HUGEPAGE);
+}
+
+static void vfio_pci_bar_unmap(struct vfio_pci_device *device, int index)
+{
+ struct vfio_pci_bar *bar = &device->bars[index];
+
+ VFIO_ASSERT_LT(index, PCI_STD_NUM_BARS);
+ VFIO_ASSERT_NOT_NULL(bar->vaddr);
+
+ VFIO_ASSERT_EQ(munmap(bar->vaddr, bar->info.size), 0);
+ bar->vaddr = NULL;
+}
+
+static void vfio_pci_bar_unmap_all(struct vfio_pci_device *device)
+{
+ int i;
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (device->bars[i].vaddr)
+ vfio_pci_bar_unmap(device, i);
+ }
+}
+
+void vfio_pci_config_access(struct vfio_pci_device *device, bool write,
+ size_t config, size_t size, void *data)
+{
+ struct vfio_region_info *config_space = &device->config_space;
+ int ret;
+
+ if (write)
+ ret = pwrite(device->fd, data, size, config_space->offset + config);
+ else
+ ret = pread(device->fd, data, size, config_space->offset + config);
+
+ VFIO_ASSERT_EQ(ret, size, "Failed to %s PCI config space: 0x%lx\n",
+ write ? "write to" : "read from", config);
+}
+
+void vfio_pci_device_reset(struct vfio_pci_device *device)
+{
+ ioctl_assert(device->fd, VFIO_DEVICE_RESET, NULL);
+}
+
+static unsigned int vfio_pci_get_group_from_dev(const char *bdf)
+{
+ char dev_iommu_group_path[PATH_MAX] = {0};
+ char sysfs_path[PATH_MAX] = {0};
+ unsigned int group;
+ int ret;
+
+ snprintf(sysfs_path, PATH_MAX, "%s/%s/iommu_group", PCI_SYSFS_PATH, bdf);
+
+ ret = readlink(sysfs_path, dev_iommu_group_path, sizeof(dev_iommu_group_path));
+ VFIO_ASSERT_NE(ret, -1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ ret = sscanf(basename(dev_iommu_group_path), "%u", &group);
+ VFIO_ASSERT_EQ(ret, 1, "Failed to get the IOMMU group for device: %s\n", bdf);
+
+ return group;
+}
+
+static void vfio_pci_group_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status),
+ };
+ char group_path[32];
+ int group;
+
+ group = vfio_pci_get_group_from_dev(bdf);
+ snprintf(group_path, sizeof(group_path), "/dev/vfio/%d", group);
+
+ device->group_fd = open(group_path, O_RDWR);
+ VFIO_ASSERT_GE(device->group_fd, 0, "open(%s) failed\n", group_path);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ VFIO_ASSERT_TRUE(group_status.flags & VFIO_GROUP_FLAGS_VIABLE);
+
+ ioctl_assert(device->group_fd, VFIO_GROUP_SET_CONTAINER, &device->iommu->container_fd);
+}
+
+static void vfio_pci_container_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ struct iommu *iommu = device->iommu;
+ unsigned long iommu_type = iommu->mode->iommu_type;
+ int ret;
+
+ vfio_pci_group_setup(device, bdf);
+
+ ret = ioctl(iommu->container_fd, VFIO_CHECK_EXTENSION, iommu_type);
+ VFIO_ASSERT_GT(ret, 0, "VFIO IOMMU type %lu not supported\n", iommu_type);
+
+ /*
+ * Allow multiple threads to race to set the IOMMU type on the
+ * container. The first will succeed and the rest should fail
+ * because the IOMMU type is already set.
+ */
+ (void)ioctl(iommu->container_fd, VFIO_SET_IOMMU, (void *)iommu_type);
+
+ device->fd = ioctl(device->group_fd, VFIO_GROUP_GET_DEVICE_FD, bdf);
+ VFIO_ASSERT_GE(device->fd, 0);
+}
+
+static void vfio_pci_device_setup(struct vfio_pci_device *device)
+{
+ int i;
+
+ device->info.argsz = sizeof(device->info);
+ ioctl_assert(device->fd, VFIO_DEVICE_GET_INFO, &device->info);
+
+ vfio_pci_region_get(device, VFIO_PCI_CONFIG_REGION_INDEX, &device->config_space);
+
+ /* Sanity check VFIO does not advertise mmap for config space */
+ VFIO_ASSERT_TRUE(!(device->config_space.flags & VFIO_REGION_INFO_FLAG_MMAP),
+ "PCI config space should not support mmap()\n");
+
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ struct vfio_pci_bar *bar = device->bars + i;
+
+ vfio_pci_region_get(device, i, &bar->info);
+ if (bar->info.flags & VFIO_REGION_INFO_FLAG_MMAP)
+ vfio_pci_bar_map(device, i);
+ }
+
+ vfio_pci_irq_get(device, VFIO_PCI_MSI_IRQ_INDEX, &device->msi_info);
+ vfio_pci_irq_get(device, VFIO_PCI_MSIX_IRQ_INDEX, &device->msix_info);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++)
+ device->msi_eventfds[i] = -1;
+}
+
+const char *vfio_pci_get_cdev_path(const char *bdf)
+{
+ char dir_path[PATH_MAX];
+ struct dirent *entry;
+ char *cdev_path;
+ DIR *dir;
+
+ cdev_path = calloc(PATH_MAX, 1);
+ VFIO_ASSERT_NOT_NULL(cdev_path);
+
+ snprintf(dir_path, sizeof(dir_path), "/sys/bus/pci/devices/%s/vfio-dev/", bdf);
+
+ dir = opendir(dir_path);
+ VFIO_ASSERT_NOT_NULL(dir, "Failed to open directory %s\n", dir_path);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /* Find the file that starts with "vfio" */
+ if (strncmp("vfio", entry->d_name, 4))
+ continue;
+
+ snprintf(cdev_path, PATH_MAX, "/dev/vfio/devices/%s", entry->d_name);
+ break;
+ }
+
+ VFIO_ASSERT_NE(cdev_path[0], 0, "Failed to find vfio cdev file.\n");
+ VFIO_ASSERT_EQ(closedir(dir), 0);
+
+ return cdev_path;
+}
+
+static void vfio_device_bind_iommufd(int device_fd, int iommufd)
+{
+ struct vfio_device_bind_iommufd args = {
+ .argsz = sizeof(args),
+ .iommufd = iommufd,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_BIND_IOMMUFD, &args);
+}
+
+static void vfio_device_attach_iommufd_pt(int device_fd, u32 pt_id)
+{
+ struct vfio_device_attach_iommufd_pt args = {
+ .argsz = sizeof(args),
+ .pt_id = pt_id,
+ };
+
+ ioctl_assert(device_fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &args);
+}
+
+static void vfio_pci_iommufd_setup(struct vfio_pci_device *device, const char *bdf)
+{
+ const char *cdev_path = vfio_pci_get_cdev_path(bdf);
+
+ device->fd = open(cdev_path, O_RDWR);
+ VFIO_ASSERT_GE(device->fd, 0);
+ free((void *)cdev_path);
+
+ vfio_device_bind_iommufd(device->fd, device->iommu->iommufd);
+ vfio_device_attach_iommufd_pt(device->fd, device->iommu->ioas_id);
+}
+
+struct vfio_pci_device *vfio_pci_device_init(const char *bdf, struct iommu *iommu)
+{
+ struct vfio_pci_device *device;
+
+ device = calloc(1, sizeof(*device));
+ VFIO_ASSERT_NOT_NULL(device);
+
+ VFIO_ASSERT_NOT_NULL(iommu);
+ device->iommu = iommu;
+ device->bdf = bdf;
+
+ if (iommu->mode->container_path)
+ vfio_pci_container_setup(device, bdf);
+ else
+ vfio_pci_iommufd_setup(device, bdf);
+
+ vfio_pci_device_setup(device);
+ vfio_pci_driver_probe(device);
+
+ return device;
+}
+
+void vfio_pci_device_cleanup(struct vfio_pci_device *device)
+{
+ int i;
+
+ if (device->driver.initialized)
+ vfio_pci_driver_remove(device);
+
+ vfio_pci_bar_unmap_all(device);
+
+ VFIO_ASSERT_EQ(close(device->fd), 0);
+
+ for (i = 0; i < ARRAY_SIZE(device->msi_eventfds); i++) {
+ if (device->msi_eventfds[i] < 0)
+ continue;
+
+ VFIO_ASSERT_EQ(close(device->msi_eventfds[i]), 0);
+ }
+
+ if (device->group_fd)
+ VFIO_ASSERT_EQ(close(device->group_fd), 0);
+
+ free(device);
+}
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_driver.c b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
new file mode 100644
index 000000000000..6827f4a6febe
--- /dev/null
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_driver.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kselftest.h"
+#include <libvfio.h>
+
+#ifdef __x86_64__
+extern struct vfio_pci_driver_ops dsa_ops;
+extern struct vfio_pci_driver_ops ioat_ops;
+#endif
+
+static struct vfio_pci_driver_ops *driver_ops[] = {
+#ifdef __x86_64__
+ &dsa_ops,
+ &ioat_ops,
+#endif
+};
+
+void vfio_pci_driver_probe(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver_ops *ops;
+ int i;
+
+ VFIO_ASSERT_NULL(device->driver.ops);
+
+ for (i = 0; i < ARRAY_SIZE(driver_ops); i++) {
+ ops = driver_ops[i];
+
+ if (ops->probe(device))
+ continue;
+
+ device->driver.ops = ops;
+ }
+}
+
+static void vfio_check_driver_op(struct vfio_pci_driver *driver, void *op,
+ const char *op_name)
+{
+ VFIO_ASSERT_NOT_NULL(driver->ops);
+ VFIO_ASSERT_NOT_NULL(op, "Driver has no %s()\n", op_name);
+ VFIO_ASSERT_EQ(driver->initialized, op != driver->ops->init);
+ VFIO_ASSERT_EQ(driver->memcpy_in_progress, op == driver->ops->memcpy_wait);
+}
+
+#define VFIO_CHECK_DRIVER_OP(_driver, _op) do { \
+ struct vfio_pci_driver *__driver = (_driver); \
+ vfio_check_driver_op(__driver, __driver->ops->_op, #_op); \
+} while (0)
+
+void vfio_pci_driver_init(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_NOT_NULL(driver->region.vaddr);
+ VFIO_CHECK_DRIVER_OP(driver, init);
+
+ driver->ops->init(device);
+
+ driver->initialized = true;
+}
+
+void vfio_pci_driver_remove(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, remove);
+
+ driver->ops->remove(device);
+ driver->initialized = false;
+}
+
+void vfio_pci_driver_send_msi(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_CHECK_DRIVER_OP(driver, send_msi);
+
+ driver->ops->send_msi(device);
+}
+
+void vfio_pci_driver_memcpy_start(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size,
+ u64 count)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+
+ VFIO_ASSERT_LE(size, driver->max_memcpy_size);
+ VFIO_ASSERT_LE(count, driver->max_memcpy_count);
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_start);
+
+ driver->ops->memcpy_start(device, src, dst, size, count);
+ driver->memcpy_in_progress = true;
+}
+
+int vfio_pci_driver_memcpy_wait(struct vfio_pci_device *device)
+{
+ struct vfio_pci_driver *driver = &device->driver;
+ int r;
+
+ VFIO_CHECK_DRIVER_OP(driver, memcpy_wait);
+
+ r = driver->ops->memcpy_wait(device);
+ driver->memcpy_in_progress = false;
+
+ return r;
+}
+
+int vfio_pci_driver_memcpy(struct vfio_pci_device *device,
+ iova_t src, iova_t dst, u64 size)
+{
+ vfio_pci_driver_memcpy_start(device, src, dst, size, 1);
+
+ return vfio_pci_driver_memcpy_wait(device);
+}