summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-31 11:01:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-31 11:01:38 -0700
commit0c86e761b95131943c2b8af2ffb3c0554f9a71f5 (patch)
tree95f9563ca78ab077b5fbe53618d1fb30bfd28f94
parentb3491d8430dd25f0a4e00c33d60da22a9bd9d052 (diff)
parent104c7405a64d937254b6a154938e6151f91c9e0d (diff)
downloadlwn-0c86e761b95131943c2b8af2ffb3c0554f9a71f5.tar.gz
lwn-0c86e761b95131943c2b8af2ffb3c0554f9a71f5.zip
Merge tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - EDID interfaces for vfio devices supporting display extensions (Gerd Hoffmann) - Generically select Type-1 IOMMU model support on ARM/ARM64 (Geert Uytterhoeven) - Quirk for VFs reporting INTx pin (Alex Williamson) - Fix error path memory leak in MSI support (Li Qiang) * tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio: vfio: add edid support to mbochs sample driver vfio: add edid api for display (vgpu) devices. drivers/vfio: Allow type-1 IOMMU instantiation with all ARM/ARM64 IOMMUs vfio/pci: Mask buggy SR-IOV VF INTx support vfio/pci: Fix potential memory leak in vfio_msi_cap_len
-rw-r--r--drivers/vfio/Kconfig2
-rw-r--r--drivers/vfio/pci/vfio_pci.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c31
-rw-r--r--include/uapi/linux/vfio.h50
-rw-r--r--samples/vfio-mdev/mbochs.c136
5 files changed, 204 insertions, 23 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index c84333eb5eb5..9de5ed38da83 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,7 @@ config VFIO_VIRQFD
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
- select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3)
+ select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index cddb453a1ba5..50cdedfca9fe 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -434,10 +434,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
{
if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
u8 pin;
+
+ if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) ||
+ vdev->nointx || vdev->pdev->is_virtfn)
+ return 0;
+
pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin);
- if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin)
- return 1;
+ return pin ? 1 : 0;
} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) {
u8 pos;
u16 flags;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 115a36f6f403..423ea1f98441 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos)
return -ENOMEM;
ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags);
- if (ret)
+ if (ret) {
+ kfree(vdev->msi_perm);
return ret;
+ }
return len;
}
@@ -1610,6 +1612,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
}
/*
+ * Nag about hardware bugs, hopefully to have vendors fix them, but at least
+ * to collect a list of dependencies for the VF INTx pin quirk below.
+ */
+static const struct pci_device_id known_bogus_vf_intx_pin[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) },
+ {}
+};
+
+/*
* For each device we allocate a pci_config_map that indicates the
* capability occupying each dword and thus the struct perm_bits we
* use for read and write. We also allocate a virtualized config
@@ -1674,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev)
if (pdev->is_virtfn) {
*(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor);
*(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device);
+
+ /*
+ * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register
+ * does not apply to VFs and VFs must implement this register
+ * as read-only with value zero. Userspace is not readily able
+ * to identify whether a device is a VF and thus that the pin
+ * definition on the device is bogus should it violate this
+ * requirement. We already virtualize the pin register for
+ * other purposes, so we simply need to replace the bogus value
+ * and consider VFs when we determine INTx IRQ count.
+ */
+ if (vconfig[PCI_INTERRUPT_PIN] &&
+ !pci_match_id(known_bogus_vf_intx_pin, pdev))
+ pci_warn(pdev,
+ "Hardware bug: VF reports bogus INTx pin %d\n",
+ vconfig[PCI_INTERRUPT_PIN]);
+
+ vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */
}
if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index f378b9802d8b..813102810f53 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -303,6 +303,56 @@ struct vfio_region_info_cap_type {
#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+#define VFIO_REGION_TYPE_GFX (1)
+#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
+
+/**
+ * struct vfio_region_gfx_edid - EDID region layout.
+ *
+ * Set display link state and EDID blob.
+ *
+ * The EDID blob has monitor information such as brand, name, serial
+ * number, physical size, supported video modes and more.
+ *
+ * This special region allows userspace (typically qemu) set a virtual
+ * EDID for the virtual monitor, which allows a flexible display
+ * configuration.
+ *
+ * For the edid blob spec look here:
+ * https://en.wikipedia.org/wiki/Extended_Display_Identification_Data
+ *
+ * On linux systems you can find the EDID blob in sysfs:
+ * /sys/class/drm/${card}/${connector}/edid
+ *
+ * You can use the edid-decode ulility (comes with xorg-x11-utils) to
+ * decode the EDID blob.
+ *
+ * @edid_offset: location of the edid blob, relative to the
+ * start of the region (readonly).
+ * @edid_max_size: max size of the edid blob (readonly).
+ * @edid_size: actual edid size (read/write).
+ * @link_state: display link state (read/write).
+ * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on.
+ * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off.
+ * @max_xres: max display width (0 == no limitation, readonly).
+ * @max_yres: max display height (0 == no limitation, readonly).
+ *
+ * EDID update protocol:
+ * (1) set link-state to down.
+ * (2) update edid blob and size.
+ * (3) set link-state to up.
+ */
+struct vfio_region_gfx_edid {
+ __u32 edid_offset;
+ __u32 edid_max_size;
+ __u32 edid_size;
+ __u32 max_xres;
+ __u32 max_yres;
+ __u32 link_state;
+#define VFIO_DEVICE_GFX_LINK_STATE_UP 1
+#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2
+};
+
/*
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
* which allows direct access to non-MSIX registers which happened to be within
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 2535c3677c7b..ca7960adf5a3 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -71,11 +71,19 @@
#define MBOCHS_NAME "mbochs"
#define MBOCHS_CLASS_NAME "mbochs"
+#define MBOCHS_EDID_REGION_INDEX VFIO_PCI_NUM_REGIONS
+#define MBOCHS_NUM_REGIONS (MBOCHS_EDID_REGION_INDEX+1)
+
#define MBOCHS_CONFIG_SPACE_SIZE 0xff
#define MBOCHS_MMIO_BAR_OFFSET PAGE_SIZE
#define MBOCHS_MMIO_BAR_SIZE PAGE_SIZE
-#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \
+#define MBOCHS_EDID_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \
MBOCHS_MMIO_BAR_SIZE)
+#define MBOCHS_EDID_SIZE PAGE_SIZE
+#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_EDID_OFFSET + \
+ MBOCHS_EDID_SIZE)
+
+#define MBOCHS_EDID_BLOB_OFFSET (MBOCHS_EDID_SIZE/2)
#define STORE_LE16(addr, val) (*(u16 *)addr = val)
#define STORE_LE32(addr, val) (*(u32 *)addr = val)
@@ -95,16 +103,24 @@ MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
static const struct mbochs_type {
const char *name;
u32 mbytes;
+ u32 max_x;
+ u32 max_y;
} mbochs_types[] = {
{
.name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
.mbytes = 4,
+ .max_x = 800,
+ .max_y = 600,
}, {
.name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
.mbytes = 16,
+ .max_x = 1920,
+ .max_y = 1440,
}, {
.name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
.mbytes = 64,
+ .max_x = 0,
+ .max_y = 0,
},
};
@@ -115,6 +131,11 @@ static struct cdev mbochs_cdev;
static struct device mbochs_dev;
static int mbochs_used_mbytes;
+struct vfio_region_info_ext {
+ struct vfio_region_info base;
+ struct vfio_region_info_cap_type type;
+};
+
struct mbochs_mode {
u32 drm_format;
u32 bytepp;
@@ -144,13 +165,14 @@ struct mdev_state {
u32 memory_bar_mask;
struct mutex ops_lock;
struct mdev_device *mdev;
- struct vfio_device_info dev_info;
const struct mbochs_type *type;
u16 vbe[VBE_DISPI_INDEX_COUNT];
u64 memsize;
struct page **pages;
pgoff_t pagecount;
+ struct vfio_region_gfx_edid edid_regs;
+ u8 edid_blob[0x400];
struct list_head dmabufs;
u32 active_id;
@@ -342,10 +364,20 @@ static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset,
char *buf, u32 count)
{
struct device *dev = mdev_dev(mdev_state->mdev);
+ struct vfio_region_gfx_edid *edid;
u16 reg16 = 0;
int index;
switch (offset) {
+ case 0x000 ... 0x3ff: /* edid block */
+ edid = &mdev_state->edid_regs;
+ if (edid->link_state != VFIO_DEVICE_GFX_LINK_STATE_UP ||
+ offset >= edid->edid_size) {
+ memset(buf, 0, count);
+ break;
+ }
+ memcpy(buf, mdev_state->edid_blob + offset, count);
+ break;
case 0x500 ... 0x515: /* bochs dispi interface */
if (count != 2)
goto unhandled;
@@ -365,6 +397,44 @@ unhandled:
}
}
+static void handle_edid_regs(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count, bool is_write)
+{
+ char *regs = (void *)&mdev_state->edid_regs;
+
+ if (offset + count > sizeof(mdev_state->edid_regs))
+ return;
+ if (count != 4)
+ return;
+ if (offset % 4)
+ return;
+
+ if (is_write) {
+ switch (offset) {
+ case offsetof(struct vfio_region_gfx_edid, link_state):
+ case offsetof(struct vfio_region_gfx_edid, edid_size):
+ memcpy(regs + offset, buf, count);
+ break;
+ default:
+ /* read-only regs */
+ break;
+ }
+ } else {
+ memcpy(buf, regs + offset, count);
+ }
+}
+
+static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count, bool is_write)
+{
+ if (offset + count > mdev_state->edid_regs.edid_max_size)
+ return;
+ if (is_write)
+ memcpy(mdev_state->edid_blob + offset, buf, count);
+ else
+ memcpy(buf, mdev_state->edid_blob + offset, count);
+}
+
static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
loff_t pos, bool is_write)
{
@@ -384,13 +454,25 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
memcpy(buf, (mdev_state->vconfig + pos), count);
} else if (pos >= MBOCHS_MMIO_BAR_OFFSET &&
- pos + count <= MBOCHS_MEMORY_BAR_OFFSET) {
+ pos + count <= (MBOCHS_MMIO_BAR_OFFSET +
+ MBOCHS_MMIO_BAR_SIZE)) {
pos -= MBOCHS_MMIO_BAR_OFFSET;
if (is_write)
handle_mmio_write(mdev_state, pos, buf, count);
else
handle_mmio_read(mdev_state, pos, buf, count);
+ } else if (pos >= MBOCHS_EDID_OFFSET &&
+ pos + count <= (MBOCHS_EDID_OFFSET +
+ MBOCHS_EDID_SIZE)) {
+ pos -= MBOCHS_EDID_OFFSET;
+ if (pos < MBOCHS_EDID_BLOB_OFFSET) {
+ handle_edid_regs(mdev_state, pos, buf, count, is_write);
+ } else {
+ pos -= MBOCHS_EDID_BLOB_OFFSET;
+ handle_edid_blob(mdev_state, pos, buf, count, is_write);
+ }
+
} else if (pos >= MBOCHS_MEMORY_BAR_OFFSET &&
pos + count <=
MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
@@ -471,6 +553,10 @@ static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev)
mdev_state->next_id = 1;
mdev_state->type = type;
+ mdev_state->edid_regs.max_xres = type->max_x;
+ mdev_state->edid_regs.max_yres = type->max_y;
+ mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET;
+ mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob);
mbochs_create_config_space(mdev_state);
mbochs_reset(mdev);
@@ -932,16 +1018,16 @@ static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
}
static int mbochs_get_region_info(struct mdev_device *mdev,
- struct vfio_region_info *region_info,
- u16 *cap_type_id, void **cap_type)
+ struct vfio_region_info_ext *ext)
{
+ struct vfio_region_info *region_info = &ext->base;
struct mdev_state *mdev_state;
mdev_state = mdev_get_drvdata(mdev);
if (!mdev_state)
return -EINVAL;
- if (region_info->index >= VFIO_PCI_NUM_REGIONS)
+ if (region_info->index >= MBOCHS_NUM_REGIONS)
return -EINVAL;
switch (region_info->index) {
@@ -964,6 +1050,20 @@ static int mbochs_get_region_info(struct mdev_device *mdev,
region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
VFIO_REGION_INFO_FLAG_WRITE);
break;
+ case MBOCHS_EDID_REGION_INDEX:
+ ext->base.argsz = sizeof(*ext);
+ ext->base.offset = MBOCHS_EDID_OFFSET;
+ ext->base.size = MBOCHS_EDID_SIZE;
+ ext->base.flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_CAPS);
+ ext->base.cap_offset = offsetof(typeof(*ext), type);
+ ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE;
+ ext->type.header.version = 1;
+ ext->type.header.next = 0;
+ ext->type.type = VFIO_REGION_TYPE_GFX;
+ ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID;
+ break;
default:
region_info->size = 0;
region_info->offset = 0;
@@ -984,7 +1084,7 @@ static int mbochs_get_device_info(struct mdev_device *mdev,
struct vfio_device_info *dev_info)
{
dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
- dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_regions = MBOCHS_NUM_REGIONS;
dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
return 0;
}
@@ -1084,7 +1184,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
unsigned long arg)
{
int ret = 0;
- unsigned long minsz;
+ unsigned long minsz, outsz;
struct mdev_state *mdev_state;
mdev_state = mdev_get_drvdata(mdev);
@@ -1106,8 +1206,6 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
if (ret)
return ret;
- memcpy(&mdev_state->dev_info, &info, sizeof(info));
-
if (copy_to_user((void __user *)arg, &info, minsz))
return -EFAULT;
@@ -1115,24 +1213,24 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
}
case VFIO_DEVICE_GET_REGION_INFO:
{
- struct vfio_region_info info;
- u16 cap_type_id = 0;
- void *cap_type = NULL;
+ struct vfio_region_info_ext info;
- minsz = offsetofend(struct vfio_region_info, offset);
+ minsz = offsetofend(typeof(info), base.offset);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
- if (info.argsz < minsz)
+ outsz = info.base.argsz;
+ if (outsz < minsz)
+ return -EINVAL;
+ if (outsz > sizeof(info))
return -EINVAL;
- ret = mbochs_get_region_info(mdev, &info, &cap_type_id,
- &cap_type);
+ ret = mbochs_get_region_info(mdev, &info);
if (ret)
return ret;
- if (copy_to_user((void __user *)arg, &info, minsz))
+ if (copy_to_user((void __user *)arg, &info, outsz))
return -EFAULT;
return 0;
@@ -1148,7 +1246,7 @@ static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd,
return -EFAULT;
if ((info.argsz < minsz) ||
- (info.index >= mdev_state->dev_info.num_irqs))
+ (info.index >= VFIO_PCI_NUM_IRQS))
return -EINVAL;
ret = mbochs_get_irq_info(mdev, &info);