summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 13:00:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-22 13:00:44 -0700
commitfc996db970a33c74d3db3ee63532b15187258027 (patch)
tree378b28e887bb9f71ae2fffc4b82115ead8da021f
parent60573c2966a1b560fabdffe308d47b6ba5585b15 (diff)
parent2e6cfd496f5b57034cf2aec738799571b5a52124 (diff)
downloadlwn-fc996db970a33c74d3db3ee63532b15187258027.tar.gz
lwn-fc996db970a33c74d3db3ee63532b15187258027.zip
Merge tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - New fsl-mc vfio bus driver supporting userspace drivers of objects within NXP's DPAA2 architecture (Diana Craciun) - Support for exposing zPCI information on s390 (Matthew Rosato) - Fixes for "detached" VFs on s390 (Matthew Rosato) - Fixes for pin-pages and dma-rw accesses (Yan Zhao) - Cleanups and optimize vconfig regen (Zenghui Yu) - Fix duplicate irq-bypass token registration (Alex Williamson) * tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio: (30 commits) vfio iommu type1: Fix memory leak in vfio_iommu_type1_pin_pages vfio/pci: Clear token on bypass registration failure vfio/fsl-mc: fix the return of the uninitialized variable ret vfio/fsl-mc: Fix the dead code in vfio_fsl_mc_set_irq_trigger vfio/fsl-mc: Fixed vfio-fsl-mc driver compilation on 32 bit MAINTAINERS: Add entry for s390 vfio-pci vfio-pci/zdev: Add zPCI capabilities to VFIO_DEVICE_GET_INFO vfio/fsl-mc: Add support for device reset vfio/fsl-mc: Add read/write support for fsl-mc devices vfio/fsl-mc: trigger an interrupt via eventfd vfio/fsl-mc: Add irq infrastructure for fsl-mc devices vfio/fsl-mc: Added lock support in preparation for interrupt handling vfio/fsl-mc: Allow userspace to MMAP fsl-mc device MMIO regions vfio/fsl-mc: Implement VFIO_DEVICE_GET_REGION_INFO ioctl call vfio/fsl-mc: Implement VFIO_DEVICE_GET_INFO ioctl vfio/fsl-mc: Scan DPRC objects on vfio-fsl-mc driver bind vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO s390/pci: track whether util_str is valid in the zpci_dev s390/pci: stash version in the zpci_dev vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices ...
-rw-r--r--MAINTAINERS14
-rw-r--r--arch/s390/include/asm/pci.h4
-rw-r--r--arch/s390/pci/pci_bus.c5
-rw-r--r--arch/s390/pci/pci_clp.c2
-rw-r--r--drivers/pci/iov.c1
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/vfio/Makefile1
-rw-r--r--drivers/vfio/fsl-mc/Kconfig9
-rw-r--r--drivers/vfio/fsl-mc/Makefile4
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c683
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c194
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc_private.h55
-rw-r--r--drivers/vfio/pci/Kconfig12
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c38
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c27
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c4
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h12
-rw-r--r--drivers/vfio/pci/vfio_pci_zdev.c143
-rw-r--r--drivers/vfio/vfio.c9
-rw-r--r--drivers/vfio/vfio_iommu_type1.c23
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/uapi/linux/vfio.h29
-rw-r--r--include/uapi/linux/vfio_zdev.h78
24 files changed, 1330 insertions, 20 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 8b82a02e609c..f5570ae5f451 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15287,6 +15287,14 @@ F: Documentation/s390/vfio-ccw.rst
F: drivers/s390/cio/vfio_ccw*
F: include/uapi/linux/vfio_ccw.h
+S390 VFIO-PCI DRIVER
+M: Matthew Rosato <mjrosato@linux.ibm.com>
+L: linux-s390@vger.kernel.org
+L: kvm@vger.kernel.org
+S: Supported
+F: drivers/vfio/pci/vfio_pci_zdev.c
+F: include/uapi/linux/vfio_zdev.h
+
S390 ZCRYPT DRIVER
M: Harald Freudenberger <freude@linux.ibm.com>
L: linux-s390@vger.kernel.org
@@ -18385,6 +18393,12 @@ F: drivers/vfio/
F: include/linux/vfio.h
F: include/uapi/linux/vfio.h
+VFIO FSL-MC DRIVER
+M: Diana Craciun <diana.craciun@oss.nxp.com>
+L: kvm@vger.kernel.org
+S: Maintained
+F: drivers/vfio/fsl-mc/
+
VFIO MEDIATED DEVICE DRIVERS
M: Kirti Wankhede <kwankhede@nvidia.com>
L: kvm@vger.kernel.org
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b5380a251df2..212628932ddc 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -132,7 +132,8 @@ struct zpci_dev {
u8 rid_available : 1;
u8 has_hp_slot : 1;
u8 is_physfn : 1;
- u8 reserved : 5;
+ u8 util_str_avail : 1;
+ u8 reserved : 4;
unsigned int devfn; /* DEVFN part of the RID*/
struct mutex lock;
@@ -179,6 +180,7 @@ struct zpci_dev {
atomic64_t mapped_pages;
atomic64_t unmapped_pages;
+ u8 version;
enum pci_bus_speed max_bus_speed;
struct dentry *debugfs_dev;
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 0c0db7c3a404..755b46f4c595 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -135,9 +135,10 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
*/
- if (zdev->vfn)
+ if (zdev->vfn) {
zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
-
+ pdev->no_command_memory = 1;
+ }
}
static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 5a34a1359dc5..153720d21ae7 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -102,6 +102,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->msi_addr = response->msia;
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
+ zdev->version = response->version;
switch (response->version) {
case 1:
@@ -167,6 +168,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
if (response->util_str_avail) {
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
+ zdev->util_str_avail = 1;
}
zdev->mio_capable = response->mio_addr_avail;
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index b37e08c4f9d1..4afd4ee4f7f0 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -180,6 +180,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id)
virtfn->device = iov->vf_device;
virtfn->is_virtfn = 1;
virtfn->physfn = pci_dev_get(dev);
+ virtfn->no_command_memory = 1;
if (id == 0)
pci_read_vf_config_common(virtfn);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index fd17db9b432f..5533df91b257 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -47,4 +47,5 @@ menuconfig VFIO_NOIOMMU
source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
source "drivers/vfio/mdev/Kconfig"
+source "drivers/vfio/fsl-mc/Kconfig"
source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index de67c4725cce..fee73f3d9480 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
obj-$(CONFIG_VFIO_PLATFORM) += platform/
obj-$(CONFIG_VFIO_MDEV) += mdev/
+obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
new file mode 100644
index 000000000000..b1a527d6b6f2
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -0,0 +1,9 @@
+config VFIO_FSL_MC
+ tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
+ depends on VFIO && FSL_MC_BUS && EVENTFD
+ help
+ Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
+ (Management Complex) devices. This is required to passthrough
+ fsl-mc bus devices using the VFIO framework.
+
+ If you don't know what to do here, say N.
diff --git a/drivers/vfio/fsl-mc/Makefile b/drivers/vfio/fsl-mc/Makefile
new file mode 100644
index 000000000000..cad6dbf0b735
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+
+vfio-fsl-mc-y := vfio_fsl_mc.o vfio_fsl_mc_intr.o
+obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
new file mode 100644
index 000000000000..0113a980f974
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2017,2019-2020 NXP
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/fsl/mc.h>
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "vfio_fsl_mc_private.h"
+
+static struct fsl_mc_driver vfio_fsl_mc_driver;
+
+static DEFINE_MUTEX(reflck_lock);
+
+static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck)
+{
+ kref_get(&reflck->kref);
+}
+
+static void vfio_fsl_mc_reflck_release(struct kref *kref)
+{
+ struct vfio_fsl_mc_reflck *reflck = container_of(kref,
+ struct vfio_fsl_mc_reflck,
+ kref);
+
+ mutex_destroy(&reflck->lock);
+ kfree(reflck);
+ mutex_unlock(&reflck_lock);
+}
+
+static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck)
+{
+ kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock);
+}
+
+static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void)
+{
+ struct vfio_fsl_mc_reflck *reflck;
+
+ reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
+ if (!reflck)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&reflck->kref);
+ mutex_init(&reflck->lock);
+
+ return reflck;
+}
+
+static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
+{
+ int ret = 0;
+
+ mutex_lock(&reflck_lock);
+ if (is_fsl_mc_bus_dprc(vdev->mc_dev)) {
+ vdev->reflck = vfio_fsl_mc_reflck_alloc();
+ ret = PTR_ERR_OR_ZERO(vdev->reflck);
+ } else {
+ struct device *mc_cont_dev = vdev->mc_dev->dev.parent;
+ struct vfio_device *device;
+ struct vfio_fsl_mc_device *cont_vdev;
+
+ device = vfio_device_get_from_dev(mc_cont_dev);
+ if (!device) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ cont_vdev = vfio_device_data(device);
+ if (!cont_vdev || !cont_vdev->reflck) {
+ vfio_device_put(device);
+ ret = -ENODEV;
+ goto unlock;
+ }
+ vfio_fsl_mc_reflck_get(cont_vdev->reflck);
+ vdev->reflck = cont_vdev->reflck;
+ vfio_device_put(device);
+ }
+
+unlock:
+ mutex_unlock(&reflck_lock);
+ return ret;
+}
+
+static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int count = mc_dev->obj_desc.region_count;
+ int i;
+
+ vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
+ GFP_KERNEL);
+ if (!vdev->regions)
+ return -ENOMEM;
+
+ for (i = 0; i < count; i++) {
+ struct resource *res = &mc_dev->regions[i];
+ int no_mmap = is_fsl_mc_bus_dprc(mc_dev);
+
+ vdev->regions[i].addr = res->start;
+ vdev->regions[i].size = resource_size(res);
+ vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
+ /*
+ * Only regions addressed with PAGE granularity may be
+ * MMAPed securely.
+ */
+ if (!no_mmap && !(vdev->regions[i].addr & ~PAGE_MASK) &&
+ !(vdev->regions[i].size & ~PAGE_MASK))
+ vdev->regions[i].flags |=
+ VFIO_REGION_INFO_FLAG_MMAP;
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+ if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+ }
+
+ return 0;
+}
+
+static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int i;
+
+ for (i = 0; i < mc_dev->obj_desc.region_count; i++)
+ iounmap(vdev->regions[i].ioaddr);
+ kfree(vdev->regions);
+}
+
+static int vfio_fsl_mc_open(void *device_data)
+{
+ struct vfio_fsl_mc_device *vdev = device_data;
+ int ret;
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ mutex_lock(&vdev->reflck->lock);
+ if (!vdev->refcnt) {
+ ret = vfio_fsl_mc_regions_init(vdev);
+ if (ret)
+ goto err_reg_init;
+ }
+ vdev->refcnt++;
+
+ mutex_unlock(&vdev->reflck->lock);
+
+ return 0;
+
+err_reg_init:
+ mutex_unlock(&vdev->reflck->lock);
+ module_put(THIS_MODULE);
+ return ret;
+}
+
+static void vfio_fsl_mc_release(void *device_data)
+{
+ struct vfio_fsl_mc_device *vdev = device_data;
+ int ret;
+
+ mutex_lock(&vdev->reflck->lock);
+
+ if (!(--vdev->refcnt)) {
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
+ struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
+
+ vfio_fsl_mc_regions_cleanup(vdev);
+
+ /* reset the device before cleaning up the interrupts */
+ ret = dprc_reset_container(mc_cont->mc_io, 0,
+ mc_cont->mc_handle,
+ mc_cont->obj_desc.id,
+ DPRC_RESET_OPTION_NON_RECURSIVE);
+
+ if (ret) {
+ dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n",
+ ret);
+ WARN_ON(1);
+ }
+
+ vfio_fsl_mc_irqs_cleanup(vdev);
+
+ fsl_mc_cleanup_irq_pool(mc_cont);
+ }
+
+ mutex_unlock(&vdev->reflck->lock);
+
+ module_put(THIS_MODULE);
+}
+
+static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
+ unsigned long arg)
+{
+ unsigned long minsz;
+ struct vfio_fsl_mc_device *vdev = device_data;
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ info.flags = VFIO_DEVICE_FLAGS_FSL_MC;
+
+ if (is_fsl_mc_bus_dprc(mc_dev))
+ info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+ info.num_regions = mc_dev->obj_desc.region_count;
+ info.num_irqs = mc_dev->obj_desc.irq_count;
+
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index >= mc_dev->obj_desc.region_count)
+ return -EINVAL;
+
+ /* map offset to the physical address */
+ info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index);
+ info.size = vdev->regions[info.index].size;
+ info.flags = vdev->regions[info.index].flags;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ if (info.index >= mc_dev->obj_desc.irq_count)
+ return -EINVAL;
+
+ info.flags = VFIO_IRQ_INFO_EVENTFD;
+ info.count = 1;
+
+ return copy_to_user((void __user *)arg, &info, minsz);
+ }
+ case VFIO_DEVICE_SET_IRQS:
+ {
+ struct vfio_irq_set hdr;
+ u8 *data = NULL;
+ int ret = 0;
+ size_t data_size = 0;
+
+ minsz = offsetofend(struct vfio_irq_set, count);
+
+ if (copy_from_user(&hdr, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr, mc_dev->obj_desc.irq_count,
+ mc_dev->obj_desc.irq_count, &data_size);
+ if (ret)
+ return ret;
+
+ if (data_size) {
+ data = memdup_user((void __user *)(arg + minsz),
+ data_size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
+
+ mutex_lock(&vdev->igate);
+ ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags,
+ hdr.index, hdr.start,
+ hdr.count, data);
+ mutex_unlock(&vdev->igate);
+ kfree(data);
+
+ return ret;
+ }
+ case VFIO_DEVICE_RESET:
+ {
+ int ret;
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+ /* reset is supported only for the DPRC */
+ if (!is_fsl_mc_bus_dprc(mc_dev))
+ return -ENOTTY;
+
+ ret = dprc_reset_container(mc_dev->mc_io, 0,
+ mc_dev->mc_handle,
+ mc_dev->obj_desc.id,
+ DPRC_RESET_OPTION_NON_RECURSIVE);
+ return ret;
+
+ }
+ default:
+ return -ENOTTY;
+ }
+}
+
+static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct vfio_fsl_mc_device *vdev = device_data;
+ unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+ loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ struct vfio_fsl_mc_region *region;
+ u64 data[8];
+ int i;
+
+ if (index >= mc_dev->obj_desc.region_count)
+ return -EINVAL;
+
+ region = &vdev->regions[index];
+
+ if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
+ return -EINVAL;
+
+ if (!region->ioaddr) {
+ region->ioaddr = ioremap(region->addr, region->size);
+ if (!region->ioaddr)
+ return -ENOMEM;
+ }
+
+ if (count != 64 || off != 0)
+ return -EINVAL;
+
+ for (i = 7; i >= 0; i--)
+ data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
+
+ if (copy_to_user(buf, data, 64))
+ return -EFAULT;
+
+ return count;
+}
+
+#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
+#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
+
+static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+ int i;
+ enum mc_cmd_status status;
+ unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
+
+ /* Write at command parameter into portal */
+ for (i = 7; i >= 1; i--)
+ writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
+
+ /* Write command header in the end */
+ writeq(cmd_data[0], ioaddr);
+
+ /* Wait for response before returning to user-space
+ * This can be optimized in future to even prepare response
+ * before returning to user-space and avoid read ioctl.
+ */
+ for (;;) {
+ u64 header;
+ struct mc_cmd_header *resp_hdr;
+
+ header = cpu_to_le64(readq_relaxed(ioaddr));
+
+ resp_hdr = (struct mc_cmd_header *)&header;
+ status = (enum mc_cmd_status)resp_hdr->status;
+ if (status != MC_CMD_STATUS_READY)
+ break;
+
+ udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
+ timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
+ if (timeout_usecs == 0)
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct vfio_fsl_mc_device *vdev = device_data;
+ unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+ loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ struct vfio_fsl_mc_region *region;
+ u64 data[8];
+ int ret;
+
+ if (index >= mc_dev->obj_desc.region_count)
+ return -EINVAL;
+
+ region = &vdev->regions[index];
+
+ if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
+ return -EINVAL;
+
+ if (!region->ioaddr) {
+ region->ioaddr = ioremap(region->addr, region->size);
+ if (!region->ioaddr)
+ return -ENOMEM;
+ }
+
+ if (count != 64 || off != 0)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 64))
+ return -EFAULT;
+
+ ret = vfio_fsl_mc_send_command(region->ioaddr, data);
+ if (ret)
+ return ret;
+
+ return count;
+
+}
+
+static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
+ struct vm_area_struct *vma)
+{
+ u64 size = vma->vm_end - vma->vm_start;
+ u64 pgoff, base;
+ u8 region_cacheable;
+
+ pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ base = pgoff << PAGE_SHIFT;
+
+ if (region.size < PAGE_SIZE || base + size > region.size)
+ return -EINVAL;
+
+ region_cacheable = (region.type & FSL_MC_REGION_CACHEABLE) &&
+ (region.type & FSL_MC_REGION_SHAREABLE);
+ if (!region_cacheable)
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ size, vma->vm_page_prot);
+}
+
+static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+{
+ struct vfio_fsl_mc_device *vdev = device_data;
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int index;
+
+ index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT);
+
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if (vma->vm_start & ~PAGE_MASK)
+ return -EINVAL;
+ if (vma->vm_end & ~PAGE_MASK)
+ return -EINVAL;
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+ if (index >= mc_dev->obj_desc.region_count)
+ return -EINVAL;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+ return -EINVAL;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)
+ && (vma->vm_flags & VM_READ))
+ return -EINVAL;
+
+ if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)
+ && (vma->vm_flags & VM_WRITE))
+ return -EINVAL;
+
+ vma->vm_private_data = mc_dev;
+
+ return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
+}
+
+static const struct vfio_device_ops vfio_fsl_mc_ops = {
+ .name = "vfio-fsl-mc",
+ .open = vfio_fsl_mc_open,
+ .release = vfio_fsl_mc_release,
+ .ioctl = vfio_fsl_mc_ioctl,
+ .read = vfio_fsl_mc_read,
+ .write = vfio_fsl_mc_write,
+ .mmap = vfio_fsl_mc_mmap,
+};
+
+static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct vfio_fsl_mc_device *vdev = container_of(nb,
+ struct vfio_fsl_mc_device, nb);
+ struct device *dev = data;
+ struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+ struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
+
+ if (action == BUS_NOTIFY_ADD_DEVICE &&
+ vdev->mc_dev == mc_cont) {
+ mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s",
+ vfio_fsl_mc_ops.name);
+ if (!mc_dev->driver_override)
+ dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n",
+ dev_name(&mc_cont->dev));
+ else
+ dev_info(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s\n",
+ dev_name(&mc_cont->dev));
+ } else if (action == BUS_NOTIFY_BOUND_DRIVER &&
+ vdev->mc_dev == mc_cont) {
+ struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver);
+
+ if (mc_drv && mc_drv != &vfio_fsl_mc_driver)
+ dev_warn(dev, "VFIO_FSL_MC: Object %s bound to driver %s while DPRC bound to vfio-fsl-mc\n",
+ dev_name(dev), mc_drv->driver.name);
+ }
+
+ return 0;
+}
+
+static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int ret;
+
+ /* Non-dprc devices share mc_io from parent */
+ if (!is_fsl_mc_bus_dprc(mc_dev)) {
+ struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
+
+ mc_dev->mc_io = mc_cont->mc_io;
+ return 0;
+ }
+
+ vdev->nb.notifier_call = vfio_fsl_mc_bus_notifier;
+ ret = bus_register_notifier(&fsl_mc_bus_type, &vdev->nb);
+ if (ret)
+ return ret;
+
+ /* open DPRC, allocate a MC portal */
+ ret = dprc_setup(mc_dev);
+ if (ret) {
+ dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
+ goto out_nc_unreg;
+ }
+
+ ret = dprc_scan_container(mc_dev, false);
+ if (ret) {
+ dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
+ goto out_dprc_cleanup;
+ }
+
+ return 0;
+
+out_dprc_cleanup:
+ dprc_remove_devices(mc_dev, NULL, 0);
+ dprc_cleanup(mc_dev);
+out_nc_unreg:
+ bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+ vdev->nb.notifier_call = NULL;
+
+ return ret;
+}
+
+static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+{
+ struct iommu_group *group;
+ struct vfio_fsl_mc_device *vdev;
+ struct device *dev = &mc_dev->dev;
+ int ret;
+
+ group = vfio_iommu_group_get(dev);
+ if (!group) {
+ dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
+ return -EINVAL;
+ }
+
+ vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
+ if (!vdev) {
+ ret = -ENOMEM;
+ goto out_group_put;
+ }
+
+ vdev->mc_dev = mc_dev;
+
+ ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
+ if (ret) {
+ dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
+ goto out_group_put;
+ }
+
+ ret = vfio_fsl_mc_reflck_attach(vdev);
+ if (ret)
+ goto out_group_dev;
+
+ ret = vfio_fsl_mc_init_device(vdev);
+ if (ret)
+ goto out_reflck;
+
+ mutex_init(&vdev->igate);
+
+ return 0;
+
+out_reflck:
+ vfio_fsl_mc_reflck_put(vdev->reflck);
+out_group_dev:
+ vfio_del_group_dev(dev);
+out_group_put:
+ vfio_iommu_group_put(group, dev);
+ return ret;
+}
+
+static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
+{
+ struct vfio_fsl_mc_device *vdev;
+ struct device *dev = &mc_dev->dev;
+
+ vdev = vfio_del_group_dev(dev);
+ if (!vdev)
+ return -EINVAL;
+
+ mutex_destroy(&vdev->igate);
+
+ vfio_fsl_mc_reflck_put(vdev->reflck);
+
+ if (is_fsl_mc_bus_dprc(mc_dev)) {
+ dprc_remove_devices(mc_dev, NULL, 0);
+ dprc_cleanup(mc_dev);
+ }
+
+ if (vdev->nb.notifier_call)
+ bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+
+ vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
+
+ return 0;
+}
+
+static struct fsl_mc_driver vfio_fsl_mc_driver = {
+ .probe = vfio_fsl_mc_probe,
+ .remove = vfio_fsl_mc_remove,
+ .driver = {
+ .name = "vfio-fsl-mc",
+ .owner = THIS_MODULE,
+ },
+};
+
+static int __init vfio_fsl_mc_driver_init(void)
+{
+ return fsl_mc_driver_register(&vfio_fsl_mc_driver);
+}
+
+static void __exit vfio_fsl_mc_driver_exit(void)
+{
+ fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
+}
+
+module_init(vfio_fsl_mc_driver_init);
+module_exit(vfio_fsl_mc_driver_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver");
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
new file mode 100644
index 000000000000..c80dceb46f79
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+
+#include <linux/vfio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/eventfd.h>
+#include <linux/msi.h>
+
+#include "linux/fsl/mc.h"
+#include "vfio_fsl_mc_private.h"
+
+int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ struct vfio_fsl_mc_irq *mc_irq;
+ int irq_count;
+ int ret, i;
+
+ /* Device does not support any interrupt */
+ if (mc_dev->obj_desc.irq_count == 0)
+ return 0;
+
+ /* interrupts were already allocated for this device */
+ if (vdev->mc_irqs)
+ return 0;
+
+ irq_count = mc_dev->obj_desc.irq_count;
+
+ mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
+ if (!mc_irq)
+ return -ENOMEM;
+
+ /* Allocate IRQs */
+ ret = fsl_mc_allocate_irqs(mc_dev);
+ if (ret) {
+ kfree(mc_irq);
+ return ret;
+ }
+
+ for (i = 0; i < irq_count; i++) {
+ mc_irq[i].count = 1;
+ mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD;
+ }
+
+ vdev->mc_irqs = mc_irq;
+
+ return 0;
+}
+
+static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
+{
+ struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
+
+ eventfd_signal(mc_irq->trigger, 1);
+ return IRQ_HANDLED;
+}
+
+static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
+ int index, int fd)
+{
+ struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
+ struct eventfd_ctx *trigger;
+ int hwirq;
+ int ret;
+
+ hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+ if (irq->trigger) {
+ free_irq(hwirq, irq);
+ kfree(irq->name);
+ eventfd_ctx_put(irq->trigger);
+ irq->trigger = NULL;
+ }
+
+ if (fd < 0) /* Disable only */
+ return 0;
+
+ irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
+ hwirq, dev_name(&vdev->mc_dev->dev));
+ if (!irq->name)
+ return -ENOMEM;
+
+ trigger = eventfd_ctx_fdget(fd);
+ if (IS_ERR(trigger)) {
+ kfree(irq->name);
+ return PTR_ERR(trigger);
+ }
+
+ irq->trigger = trigger;
+
+ ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0,
+ irq->name, irq);
+ if (ret) {
+ kfree(irq->name);
+ eventfd_ctx_put(trigger);
+ irq->trigger = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
+ unsigned int index, unsigned int start,
+ unsigned int count, u32 flags,
+ void *data)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int ret, hwirq;
+ struct vfio_fsl_mc_irq *irq;
+ struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
+ struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
+
+ if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
+ return vfio_set_trigger(vdev, index, -1);
+
+ if (start != 0 || count != 1)
+ return -EINVAL;
+
+ mutex_lock(&vdev->reflck->lock);
+ ret = fsl_mc_populate_irq_pool(mc_cont,
+ FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
+ if (ret)
+ goto unlock;
+
+ ret = vfio_fsl_mc_irqs_allocate(vdev);
+ if (ret)
+ goto unlock;
+ mutex_unlock(&vdev->reflck->lock);
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ s32 fd = *(s32 *)data;
+
+ return vfio_set_trigger(vdev, index, fd);
+ }
+
+ hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+
+ irq = &vdev->mc_irqs[index];
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ vfio_fsl_mc_irq_handler(hwirq, irq);
+
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ u8 trigger = *(u8 *)data;
+
+ if (trigger)
+ vfio_fsl_mc_irq_handler(hwirq, irq);
+ }
+
+ return 0;
+
+unlock:
+ mutex_unlock(&vdev->reflck->lock);
+ return ret;
+
+}
+
+int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+ u32 flags, unsigned int index,
+ unsigned int start, unsigned int count,
+ void *data)
+{
+ if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
+ return vfio_fsl_mc_set_irq_trigger(vdev, index, start,
+ count, flags, data);
+ else
+ return -EINVAL;
+}
+
+/* Free All IRQs for the given MC object */
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+ struct fsl_mc_device *mc_dev = vdev->mc_dev;
+ int irq_count = mc_dev->obj_desc.irq_count;
+ int i;
+
+ /*
+ * Device does not support any interrupt or the interrupts
+ * were not configured
+ */
+ if (!vdev->mc_irqs)
+ return;
+
+ for (i = 0; i < irq_count; i++)
+ vfio_set_trigger(vdev, i, -1);
+
+ fsl_mc_free_irqs(mc_dev);
+ kfree(vdev->mc_irqs);
+ vdev->mc_irqs = NULL;
+}
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
new file mode 100644
index 000000000000..a97ee691ed47
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016,2019-2020 NXP
+ */
+
+#ifndef VFIO_FSL_MC_PRIVATE_H
+#define VFIO_FSL_MC_PRIVATE_H
+
+#define VFIO_FSL_MC_OFFSET_SHIFT 40
+#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1)
+
+#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) ((off) >> VFIO_FSL_MC_OFFSET_SHIFT)
+
+#define VFIO_FSL_MC_INDEX_TO_OFFSET(index) \
+ ((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT)
+
+struct vfio_fsl_mc_irq {
+ u32 flags;
+ u32 count;
+ struct eventfd_ctx *trigger;
+ char *name;
+};
+
+struct vfio_fsl_mc_reflck {
+ struct kref kref;
+ struct mutex lock;
+};
+
+struct vfio_fsl_mc_region {
+ u32 flags;
+ u32 type;
+ u64 addr;
+ resource_size_t size;
+ void __iomem *ioaddr;
+};
+
+struct vfio_fsl_mc_device {
+ struct fsl_mc_device *mc_dev;
+ struct notifier_block nb;
+ int refcnt;
+ struct vfio_fsl_mc_region *regions;
+ struct vfio_fsl_mc_reflck *reflck;
+ struct mutex igate;
+ struct vfio_fsl_mc_irq *mc_irqs;
+};
+
+extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+ u32 flags, unsigned int index,
+ unsigned int start, unsigned int count,
+ void *data);
+
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev);
+
+#endif /* VFIO_FSL_MC_PRIVATE_H */
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index ac3c1dd3edef..40a223381ab6 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -45,3 +45,15 @@ config VFIO_PCI_NVLINK2
depends on VFIO_PCI && PPC_POWERNV
help
VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
+
+config VFIO_PCI_ZDEV
+ bool "VFIO PCI ZPCI device CLP support"
+ depends on VFIO_PCI && S390
+ default y
+ help
+ Enabling this option exposes VFIO capabilities containing hardware
+ configuration for zPCI devices. This enables userspace (e.g. QEMU)
+ to supply proper configuration values instead of hard-coded defaults
+ for zPCI devices passed through via VFIO on s390.
+
+ Say Y here.
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index f027f8a0e89c..781e0809d6ee 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -3,5 +3,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
+vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b0f4b92a87ed..fbd2b3404184 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -807,15 +807,25 @@ static long vfio_pci_ioctl(void *device_data,
if (cmd == VFIO_DEVICE_GET_INFO) {
struct vfio_device_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ unsigned long capsz;
minsz = offsetofend(struct vfio_device_info, num_irqs);
+ /* For backward compatibility, cannot require this */
+ capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
+ if (info.argsz >= capsz) {
+ minsz = capsz;
+ info.cap_offset = 0;
+ }
+
info.flags = VFIO_DEVICE_FLAGS_PCI;
if (vdev->reset_works)
@@ -824,6 +834,33 @@ static long vfio_pci_ioctl(void *device_data,
info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
+ if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
+ int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
+
+ if (ret && ret != -ENODEV) {
+ pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
+ return ret;
+ }
+ }
+
+ if (caps.size) {
+ info.flags |= VFIO_DEVICE_FLAGS_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user((void __user *)arg +
+ sizeof(info), caps.buf,
+ caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
+ }
+ info.cap_offset = sizeof(info);
+ }
+
+ kfree(caps.buf);
+ }
+
return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0;
@@ -1860,7 +1897,6 @@ static const struct vfio_device_ops vfio_pci_ops = {
static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
-static struct pci_driver vfio_pci_driver;
static int vfio_pci_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index d98843feddce..a402adee8a21 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -406,7 +406,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
* PF SR-IOV capability, there's therefore no need to trigger
* faults based on the virtual value.
*/
- return pdev->is_virtfn || (cmd & PCI_COMMAND_MEMORY);
+ return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY);
}
/*
@@ -467,6 +467,9 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
__le32 *vbar;
u64 mask;
+ if (!vdev->bardirty)
+ return;
+
vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0];
for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) {
@@ -520,8 +523,8 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,
count = vfio_default_config_read(vdev, pos, count, perm, offset, val);
- /* Mask in virtual memory enable for SR-IOV devices */
- if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) {
+ /* Mask in virtual memory enable */
+ if (offset == PCI_COMMAND && vdev->pdev->no_command_memory) {
u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
u32 tmp_val = le32_to_cpu(*val);
@@ -589,9 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
* shows it disabled (phys_mem/io, then the device has
* undergone some kind of backdoor reset and needs to be
* restored before we allow it to enable the bars.
- * SR-IOV devices will trigger this, but we catch them later
+ * SR-IOV devices will trigger this - for mem enable let's
+ * catch this now and for io enable it will be caught later
*/
- if ((new_mem && virt_mem && !phys_mem) ||
+ if ((new_mem && virt_mem && !phys_mem &&
+ !pdev->no_command_memory) ||
(new_io && virt_io && !phys_io) ||
vfio_need_bar_restore(vdev))
vfio_bar_restore(vdev);
@@ -1734,12 +1739,14 @@ int vfio_config_init(struct vfio_pci_device *vdev)
vconfig[PCI_INTERRUPT_PIN]);
vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */
-
+ }
+ if (pdev->no_command_memory) {
/*
- * VFs do no implement the memory enable bit of the COMMAND
- * register therefore we'll not have it set in our initial
- * copy of config space after pci_enable_device(). For
- * consistency with PFs, set the virtual enable bit here.
+ * VFs and devices that set pdev->no_command_memory do not
+ * implement the memory enable bit of the COMMAND register
+ * therefore we'll not have it set in our initial copy of
+ * config space after pci_enable_device(). For consistency
+ * with PFs, set the virtual enable bit here.
*/
*(__le16 *)&vconfig[PCI_COMMAND] |=
cpu_to_le16(PCI_COMMAND_MEMORY);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 1d9fb2592945..869dce5f134d 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -352,11 +352,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
vdev->ctx[vector].producer.token = trigger;
vdev->ctx[vector].producer.irq = irq;
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
- if (unlikely(ret))
+ if (unlikely(ret)) {
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
vdev->ctx[vector].producer.token, ret);
+ vdev->ctx[vector].producer.token = NULL;
+ }
vdev->ctx[vector].trigger = trigger;
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 61ca8ab165dc..5c90e560c5c7 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -213,4 +213,16 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
+
+#ifdef CONFIG_VFIO_PCI_ZDEV
+extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps);
+#else
+static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ return -ENODEV;
+}
+#endif
+
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
new file mode 100644
index 000000000000..229685634031
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2020. All rights reserved.
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ * Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/vfio_zdev.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_io.h>
+
+#include "vfio_pci_private.h"
+
+/*
+ * Add the Base PCI Function information to the device info region.
+ */
+static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_zpci_base cap = {
+ .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
+ .header.version = 1,
+ .start_dma = zdev->start_dma,
+ .end_dma = zdev->end_dma,
+ .pchid = zdev->pchid,
+ .vfn = zdev->vfn,
+ .fmb_length = zdev->fmb_length,
+ .pft = zdev->pft,
+ .gid = zdev->pfgid
+ };
+
+ return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+/*
+ * Add the Base PCI Function Group information to the device info region.
+ */
+static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_zpci_group cap = {
+ .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
+ .header.version = 1,
+ .dasm = zdev->dma_mask,
+ .msi_addr = zdev->msi_addr,
+ .flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH,
+ .mui = zdev->fmb_update,
+ .noi = zdev->max_msi,
+ .maxstbl = ZPCI_MAX_WRITE_SIZE,
+ .version = zdev->version
+ };
+
+ return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+/*
+ * Add the device utility string to the device info region.
+ */
+static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_zpci_util *cap;
+ int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
+ int ret;
+
+ cap = kmalloc(cap_size, GFP_KERNEL);
+
+ cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
+ cap->header.version = 1;
+ cap->size = CLP_UTIL_STR_LEN;
+ memcpy(cap->util_str, zdev->util_str, cap->size);
+
+ ret = vfio_info_add_capability(caps, &cap->header, cap_size);
+
+ kfree(cap);
+
+ return ret;
+}
+
+/*
+ * Add the function path string to the device info region.
+ */
+static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_zpci_pfip *cap;
+ int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
+ int ret;
+
+ cap = kmalloc(cap_size, GFP_KERNEL);
+
+ cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
+ cap->header.version = 1;
+ cap->size = CLP_PFIP_NR_SEGMENTS;
+ memcpy(cap->pfip, zdev->pfip, cap->size);
+
+ ret = vfio_info_add_capability(caps, &cap->header, cap_size);
+
+ kfree(cap);
+
+ return ret;
+}
+
+/*
+ * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain.
+ */
+int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct zpci_dev *zdev = to_zpci(vdev->pdev);
+ int ret;
+
+ if (!zdev)
+ return -ENODEV;
+
+ ret = zpci_base_cap(zdev, vdev, caps);
+ if (ret)
+ return ret;
+
+ ret = zpci_group_cap(zdev, vdev, caps);
+ if (ret)
+ return ret;
+
+ if (zdev->util_str_avail) {
+ ret = zpci_util_cap(zdev, vdev, caps);
+ if (ret)
+ return ret;
+ }
+
+ ret = zpci_pfip_cap(zdev, vdev, caps);
+
+ return ret;
+}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 262ab0efd06c..2151bc7f87ab 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1949,8 +1949,10 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
if (!group)
return -ENODEV;
- if (group->dev_counter > 1)
- return -EINVAL;
+ if (group->dev_counter > 1) {
+ ret = -EINVAL;
+ goto err_pin_pages;
+ }
ret = vfio_group_add_container_user(group);
if (ret)
@@ -2051,6 +2053,9 @@ int vfio_group_pin_pages(struct vfio_group *group,
if (!group || !user_iova_pfn || !phys_pfn || !npage)
return -EINVAL;
+ if (group->dev_counter > 1)
+ return -EINVAL;
+
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c255a6683f31..bb2684cc245e 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -693,7 +693,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
if (ret) {
- vfio_unpin_page_external(dma, iova, do_accounting);
+ if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
+ vfio_lock_acct(dma, -1, true);
goto pin_unwind;
}
@@ -2609,6 +2610,20 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
}
+static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_iommu_type1_info_dma_avail cap_dma_avail;
+
+ cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL;
+ cap_dma_avail.header.version = 1;
+
+ cap_dma_avail.avail = iommu->dma_avail;
+
+ return vfio_info_add_capability(caps, &cap_dma_avail.header,
+ sizeof(cap_dma_avail));
+}
+
static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
unsigned long arg)
{
@@ -2642,6 +2657,9 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
ret = vfio_iommu_migration_build_caps(iommu, &caps);
if (!ret)
+ ret = vfio_iommu_dma_avail_build_caps(iommu, &caps);
+
+ if (!ret)
ret = vfio_iommu_iova_build_caps(iommu, &caps);
mutex_unlock(&iommu->lock);
@@ -2933,7 +2951,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
* size
*/
bitmap_set(dma->bitmap, offset >> pgshift,
- *copied >> pgshift);
+ ((offset + *copied - 1) >> pgshift) -
+ (offset >> pgshift) + 1);
}
} else
*copied = copy_from_user(data, (void __user *)vaddr,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 32cd2b550736..22207a79762c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -446,6 +446,7 @@ struct pci_dev {
unsigned int is_probed:1; /* Device probing in progress */
unsigned int link_active_reporting:1;/* Device capable of reporting link active */
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
+ unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 920470502329..2f313a238a8f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -201,8 +201,11 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
+#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
+ __u32 cap_offset; /* Offset within info struct of first cap */
};
#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
@@ -218,6 +221,15 @@ struct vfio_device_info {
#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
#define VFIO_DEVICE_API_AP_STRING "vfio-ap"
+/*
+ * The following capabilities are unique to s390 zPCI devices. Their contents
+ * are further-defined in vfio_zdev.h
+ */
+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1
+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2
+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
+
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
@@ -462,7 +474,7 @@ struct vfio_region_gfx_edid {
* 5. Resumed
* |--------->|
*
- * 0. Default state of VFIO device is _RUNNNG when the user application starts.
+ * 0. Default state of VFIO device is _RUNNING when the user application starts.
* 1. During normal shutdown of the user application, the user application may
* optionally change the VFIO device state from _RUNNING to _STOP. This
* transition is optional. The vendor driver must support this transition but
@@ -1039,6 +1051,21 @@ struct vfio_iommu_type1_info_cap_migration {
__u64 max_dirty_bitmap_size; /* in bytes */
};
+/*
+ * The DMA available capability allows to report the current number of
+ * simultaneously outstanding DMA mappings that are allowed.
+ *
+ * The structure below defines version 1 of this capability.
+ *
+ * avail: specifies the current number of outstanding DMA mappings allowed.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
+
+struct vfio_iommu_type1_info_dma_avail {
+ struct vfio_info_cap_header header;
+ __u32 avail;
+};
+
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
/**
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
new file mode 100644
index 000000000000..b4309397b6b2
--- /dev/null
+++ b/include/uapi/linux/vfio_zdev.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * VFIO Region definitions for ZPCI devices
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ * Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef _VFIO_ZDEV_H_
+#define _VFIO_ZDEV_H_
+
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information
+ *
+ * This capability provides a set of descriptive information about the
+ * associated PCI function.
+ */
+struct vfio_device_info_cap_zpci_base {
+ struct vfio_info_cap_header header;
+ __u64 start_dma; /* Start of available DMA addresses */
+ __u64 end_dma; /* End of available DMA addresses */
+ __u16 pchid; /* Physical Channel ID */
+ __u16 vfn; /* Virtual function number */
+ __u16 fmb_length; /* Measurement Block Length (in bytes) */
+ __u8 pft; /* PCI Function Type */
+ __u8 gid; /* PCI function group ID */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information
+ *
+ * This capability provides a set of descriptive information about the group of
+ * PCI functions that the associated device belongs to.
+ */
+struct vfio_device_info_cap_zpci_group {
+ struct vfio_info_cap_header header;
+ __u64 dasm; /* DMA Address space mask */
+ __u64 msi_addr; /* MSI address */
+ __u64 flags;
+#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */
+ __u16 mui; /* Measurement Block Update Interval */
+ __u16 noi; /* Maximum number of MSIs */
+ __u16 maxstbl; /* Maximum Store Block Length */
+ __u8 version; /* Supported PCI Version */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String
+ *
+ * This capability provides the utility string for the associated device, which
+ * is a device identifier string made up of EBCDID characters. 'size' specifies
+ * the length of 'util_str'.
+ */
+struct vfio_device_info_cap_zpci_util {
+ struct vfio_info_cap_header header;
+ __u32 size;
+ __u8 util_str[];
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path
+ *
+ * This capability provides the PCI function path string, which is an identifier
+ * that describes the internal hardware path of the device. 'size' specifies
+ * the length of 'pfip'.
+ */
+struct vfio_device_info_cap_zpci_pfip {
+ struct vfio_info_cap_header header;
+ __u32 size;
+ __u8 pfip[];
+};
+
+#endif