summaryrefslogtreecommitdiff
path: root/drivers/pci
diff options
context:
space:
mode:
authorBjorn Helgaas <bhelgaas@google.com>2019-09-23 16:10:12 -0500
committerBjorn Helgaas <bhelgaas@google.com>2019-09-23 16:10:12 -0500
commit63fa8437cbae701c163f692bce6ad6e847910e49 (patch)
treef72faaf618857ad33128c1c3dc3e580f450227d2 /drivers/pci
parent3ddbff3676bea1fbd6c1b70e12416e17aaf91a66 (diff)
parent27235b15e4197b3a1014cf0ca0b08dbbf61a692b (diff)
downloadlwn-63fa8437cbae701c163f692bce6ad6e847910e49.tar.gz
lwn-63fa8437cbae701c163f692bce6ad6e847910e49.zip
Merge branch 'pci/p2pdma'
- Move P2PCMA PCI bus offset from generic dev_pagemap to pci_p2pdma_pagemap (Logan Gunthorpe) - Add provider's pci_dev to pci_p2pdma_pagemap (Logan Gunthorpe) - Apply host bridge whitelist for ACS (Logan Gunthorpe) - Whitelist some Intel host bridges for P2PDMA (Logan Gunthorpe) - Add attrs to pci_p2pdma_map_sg() to match dma_map_sg() (Logan Gunthorpe) - Add pci_p2pdma_unmap_sg() (Logan Gunthorpe) - Store P2PDMA mapping method in xarray (Logan Gunthorpe) - Map requests that traverse a host bridge (Logan Gunthorpe) - Allow IOMMU for host bridge whitelist (Logan Gunthorpe) * pci/p2pdma: PCI/P2PDMA: Update pci_p2pdma_distance_many() documentation PCI/P2PDMA: Allow IOMMU for host bridge whitelist PCI/P2PDMA: dma_map() requests that traverse the host bridge PCI/P2PDMA: Store mapping method in an xarray PCI/P2PDMA: Factor out __pci_p2pdma_map_sg() PCI/P2PDMA: Introduce pci_p2pdma_unmap_sg() PCI/P2PDMA: Add attrs argument to pci_p2pdma_map_sg() PCI/P2PDMA: Whitelist some Intel host bridges PCI/P2PDMA: Factor out host_bridge_whitelist() PCI/P2PDMA: Apply host bridge whitelist for ACS PCI/P2PDMA: Factor out __upstream_bridge_distance() PCI/P2PDMA: Add constants for map type results to upstream_bridge_distance() PCI/P2PDMA: Add provider's pci_dev to pci_p2pdma_pagemap struct PCI/P2PDMA: Introduce private pagemap structure
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/p2pdma.c374
1 files changed, 266 insertions, 108 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 234476226529..0608aae72ccc 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -18,13 +18,32 @@
#include <linux/percpu-refcount.h>
#include <linux/random.h>
#include <linux/seq_buf.h>
-#include <linux/iommu.h>
+#include <linux/xarray.h>
+
+enum pci_p2pdma_map_type {
+ PCI_P2PDMA_MAP_UNKNOWN = 0,
+ PCI_P2PDMA_MAP_NOT_SUPPORTED,
+ PCI_P2PDMA_MAP_BUS_ADDR,
+ PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
+};
struct pci_p2pdma {
struct gen_pool *pool;
bool p2pmem_published;
+ struct xarray map_types;
};
+struct pci_p2pdma_pagemap {
+ struct dev_pagemap pgmap;
+ struct pci_dev *provider;
+ u64 bus_offset;
+};
+
+static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
+{
+ return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
+}
+
static ssize_t size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -87,6 +106,7 @@ static void pci_p2pdma_release(void *data)
gen_pool_destroy(p2pdma->pool);
sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
+ xa_destroy(&p2pdma->map_types);
}
static int pci_p2pdma_setup(struct pci_dev *pdev)
@@ -98,6 +118,8 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
if (!p2p)
return -ENOMEM;
+ xa_init(&p2p->map_types);
+
p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
if (!p2p->pool)
goto out;
@@ -135,6 +157,7 @@ out:
int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
u64 offset)
{
+ struct pci_p2pdma_pagemap *p2p_pgmap;
struct dev_pagemap *pgmap;
void *addr;
int error;
@@ -157,14 +180,18 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return error;
}
- pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
- if (!pgmap)
+ p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+ if (!p2p_pgmap)
return -ENOMEM;
+
+ pgmap = &p2p_pgmap->pgmap;
pgmap->res.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar);
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
- pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+
+ p2p_pgmap->provider = pdev;
+ p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
pci_resource_start(pdev, bar);
addr = devm_memremap_pages(&pdev->dev, pgmap);
@@ -246,19 +273,32 @@ static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
seq_buf_printf(buf, "%s;", pci_name(pdev));
}
-/*
- * If we can't find a common upstream bridge take a look at the root
- * complex and compare it to a whitelist of known good hardware.
- */
-static bool root_complex_whitelist(struct pci_dev *dev)
+static const struct pci_p2pdma_whitelist_entry {
+ unsigned short vendor;
+ unsigned short device;
+ enum {
+ REQ_SAME_HOST_BRIDGE = 1 << 0,
+ } flags;
+} pci_p2pdma_whitelist[] = {
+ /* AMD ZEN */
+ {PCI_VENDOR_ID_AMD, 0x1450, 0},
+
+ /* Intel Xeon E5/Core i7 */
+ {PCI_VENDOR_ID_INTEL, 0x3c00, REQ_SAME_HOST_BRIDGE},
+ {PCI_VENDOR_ID_INTEL, 0x3c01, REQ_SAME_HOST_BRIDGE},
+ /* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
+ {PCI_VENDOR_ID_INTEL, 0x2f00, REQ_SAME_HOST_BRIDGE},
+ {PCI_VENDOR_ID_INTEL, 0x2f01, REQ_SAME_HOST_BRIDGE},
+ {}
+};
+
+static bool __host_bridge_whitelist(struct pci_host_bridge *host,
+ bool same_host_bridge)
{
- struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0));
+ const struct pci_p2pdma_whitelist_entry *entry;
unsigned short vendor, device;
- if (iommu_present(dev->dev.bus))
- return false;
-
if (!root)
return false;
@@ -266,65 +306,49 @@ static bool root_complex_whitelist(struct pci_dev *dev)
device = root->device;
pci_dev_put(root);
- /* AMD ZEN host bridges can do peer to peer */
- if (vendor == PCI_VENDOR_ID_AMD && device == 0x1450)
+ for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
+ if (vendor != entry->vendor || device != entry->device)
+ continue;
+ if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
+ return false;
+
return true;
+ }
return false;
}
/*
- * Find the distance through the nearest common upstream bridge between
- * two PCI devices.
- *
- * If the two devices are the same device then 0 will be returned.
- *
- * If there are two virtual functions of the same device behind the same
- * bridge port then 2 will be returned (one step down to the PCIe switch,
- * then one step back to the same device).
- *
- * In the case where two devices are connected to the same PCIe switch, the
- * value 4 will be returned. This corresponds to the following PCI tree:
- *
- * -+ Root Port
- * \+ Switch Upstream Port
- * +-+ Switch Downstream Port
- * + \- Device A
- * \-+ Switch Downstream Port
- * \- Device B
- *
- * The distance is 4 because we traverse from Device A through the downstream
- * port of the switch, to the common upstream port, back up to the second
- * downstream port and then to Device B.
- *
- * Any two devices that don't have a common upstream bridge will return -1.
- * In this way devices on separate PCIe root ports will be rejected, which
- * is what we want for peer-to-peer seeing each PCIe root port defines a
- * separate hierarchy domain and there's no way to determine whether the root
- * complex supports forwarding between them.
- *
- * In the case where two devices are connected to different PCIe switches,
- * this function will still return a positive distance as long as both
- * switches eventually have a common upstream bridge. Note this covers
- * the case of using multiple PCIe switches to achieve a desired level of
- * fan-out from a root port. The exact distance will be a function of the
- * number of switches between Device A and Device B.
- *
- * If a bridge which has any ACS redirection bits set is in the path
- * then this functions will return -2. This is so we reject any
- * cases where the TLPs are forwarded up into the root complex.
- * In this case, a list of all infringing bridge addresses will be
- * populated in acs_list (assuming it's non-null) for printk purposes.
+ * If we can't find a common upstream bridge take a look at the root
+ * complex and compare it to a whitelist of known good hardware.
*/
-static int upstream_bridge_distance(struct pci_dev *provider,
- struct pci_dev *client,
- struct seq_buf *acs_list)
+static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b)
+{
+ struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
+ struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
+
+ if (host_a == host_b)
+ return __host_bridge_whitelist(host_a, true);
+
+ if (__host_bridge_whitelist(host_a, false) &&
+ __host_bridge_whitelist(host_b, false))
+ return true;
+
+ return false;
+}
+
+static enum pci_p2pdma_map_type
+__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+ int *dist, bool *acs_redirects, struct seq_buf *acs_list)
{
struct pci_dev *a = provider, *b = client, *bb;
int dist_a = 0;
int dist_b = 0;
int acs_cnt = 0;
+ if (acs_redirects)
+ *acs_redirects = false;
+
/*
* Note, we don't need to take references to devices returned by
* pci_upstream_bridge() seeing we hold a reference to a child
@@ -353,15 +377,10 @@ static int upstream_bridge_distance(struct pci_dev *provider,
dist_a++;
}
- /*
- * Allow the connection if both devices are on a whitelisted root
- * complex, but add an arbitrary large value to the distance.
- */
- if (root_complex_whitelist(provider) &&
- root_complex_whitelist(client))
- return 0x1000 + dist_a + dist_b;
+ if (dist)
+ *dist = dist_a + dist_b;
- return -1;
+ return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
check_b_path_acs:
bb = b;
@@ -378,33 +397,110 @@ check_b_path_acs:
bb = pci_upstream_bridge(bb);
}
- if (acs_cnt)
- return -2;
+ if (dist)
+ *dist = dist_a + dist_b;
+
+ if (acs_cnt) {
+ if (acs_redirects)
+ *acs_redirects = true;
+
+ return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
+ }
+
+ return PCI_P2PDMA_MAP_BUS_ADDR;
+}
+
+static unsigned long map_types_idx(struct pci_dev *client)
+{
+ return (pci_domain_nr(client->bus) << 16) |
+ (client->bus->number << 8) | client->devfn;
+}
+
+/*
+ * Find the distance through the nearest common upstream bridge between
+ * two PCI devices.
+ *
+ * If the two devices are the same device then 0 will be returned.
+ *
+ * If there are two virtual functions of the same device behind the same
+ * bridge port then 2 will be returned (one step down to the PCIe switch,
+ * then one step back to the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch, the
+ * value 4 will be returned. This corresponds to the following PCI tree:
+ *
+ * -+ Root Port
+ * \+ Switch Upstream Port
+ * +-+ Switch Downstream Port
+ * + \- Device A
+ * \-+ Switch Downstream Port
+ * \- Device B
+ *
+ * The distance is 4 because we traverse from Device A through the downstream
+ * port of the switch, to the common upstream port, back up to the second
+ * downstream port and then to Device B.
+ *
+ * Any two devices that cannot communicate using p2pdma will return
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * Any two devices that have a data path that goes through the host bridge
+ * will consult a whitelist. If the host bridges are on the whitelist,
+ * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE.
+ *
+ * If either bridge is not on the whitelist this function returns
+ * PCI_P2PDMA_MAP_NOT_SUPPORTED.
+ *
+ * If a bridge which has any ACS redirection bits set is in the path,
+ * acs_redirects will be set to true. In this case, a list of all infringing
+ * bridge addresses will be populated in acs_list (assuming it's non-null)
+ * for printk purposes.
+ */
+static enum pci_p2pdma_map_type
+upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
+ int *dist, bool *acs_redirects, struct seq_buf *acs_list)
+{
+ enum pci_p2pdma_map_type map_type;
+
+ map_type = __upstream_bridge_distance(provider, client, dist,
+ acs_redirects, acs_list);
+
+ if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) {
+ if (!host_bridge_whitelist(provider, client))
+ map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
+ }
+
+ if (provider->p2pdma)
+ xa_store(&provider->p2pdma->map_types, map_types_idx(client),
+ xa_mk_value(map_type), GFP_KERNEL);
- return dist_a + dist_b;
+ return map_type;
}
-static int upstream_bridge_distance_warn(struct pci_dev *provider,
- struct pci_dev *client)
+static enum pci_p2pdma_map_type
+upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client,
+ int *dist)
{
struct seq_buf acs_list;
+ bool acs_redirects;
int ret;
seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
if (!acs_list.buffer)
return -ENOMEM;
- ret = upstream_bridge_distance(provider, client, &acs_list);
- if (ret == -2) {
- pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
+ ret = upstream_bridge_distance(provider, client, dist, &acs_redirects,
+ &acs_list);
+ if (acs_redirects) {
+ pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
pci_name(provider));
/* Drop final semicolon */
acs_list.buffer[acs_list.len-1] = 0;
pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
acs_list.buffer);
+ }
- } else if (ret < 0) {
- pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
+ if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) {
+ pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
pci_name(provider));
}
@@ -421,22 +517,22 @@ static int upstream_bridge_distance_warn(struct pci_dev *provider,
* @num_clients: number of clients in the array
* @verbose: if true, print warnings for devices when we return -1
*
- * Returns -1 if any of the clients are not compatible (behind the same
- * root port as the provider), otherwise returns a positive number where
- * a lower number is the preferable choice. (If there's one client
- * that's the same as the provider it will return 0, which is best choice).
+ * Returns -1 if any of the clients are not compatible, otherwise returns a
+ * positive number where a lower number is the preferable choice. (If there's
+ * one client that's the same as the provider it will return 0, which is best
+ * choice).
*
- * For now, "compatible" means the provider and the clients are all behind
- * the same PCI root port. This cuts out cases that may work but is safest
- * for the user. Future work can expand this to white-list root complexes that
- * can safely forward between each ports.
+ * "compatible" means the provider and the clients are either all behind
+ * the same PCI root port or the host bridges connected to each of the devices
+ * are listed in the 'pci_p2pdma_whitelist'.
*/
int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
int num_clients, bool verbose)
{
bool not_supported = false;
struct pci_dev *pci_client;
- int distance = 0;
+ int total_dist = 0;
+ int distance;
int i, ret;
if (num_clients == 0)
@@ -461,26 +557,26 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
if (verbose)
ret = upstream_bridge_distance_warn(provider,
- pci_client);
+ pci_client, &distance);
else
ret = upstream_bridge_distance(provider, pci_client,
- NULL);
+ &distance, NULL, NULL);
pci_dev_put(pci_client);
- if (ret < 0)
+ if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED)
not_supported = true;
if (not_supported && !verbose)
break;
- distance += ret;
+ total_dist += distance;
}
if (not_supported)
return -1;
- return distance;
+ return total_dist;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
@@ -706,21 +802,19 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
}
EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
-/**
- * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
- * @dev: device doing the DMA request
- * @sg: scatter list to map
- * @nents: elements in the scatterlist
- * @dir: DMA direction
- *
- * Scatterlists mapped with this function should not be unmapped in any way.
- *
- * Returns the number of SG entries mapped or 0 on error.
- */
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction dir)
+static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider,
+ struct pci_dev *client)
+{
+ if (!provider->p2pdma)
+ return PCI_P2PDMA_MAP_NOT_SUPPORTED;
+
+ return xa_to_value(xa_load(&provider->p2pdma->map_types,
+ map_types_idx(client)));
+}
+
+static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
+ struct device *dev, struct scatterlist *sg, int nents)
{
- struct dev_pagemap *pgmap;
struct scatterlist *s;
phys_addr_t paddr;
int i;
@@ -736,16 +830,80 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
return 0;
for_each_sg(sg, s, nents, i) {
- pgmap = sg_page(s)->pgmap;
paddr = sg_phys(s);
- s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+ s->dma_address = paddr - p2p_pgmap->bus_offset;
sg_dma_len(s) = s->length;
}
return nents;
}
-EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_map_sg() (if called)
+ *
+ * Scatterlists mapped with this function should be unmapped using
+ * pci_p2pdma_unmap_sg_attrs().
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct pci_p2pdma_pagemap *p2p_pgmap =
+ to_p2p_pgmap(sg_page(sg)->pgmap);
+ struct pci_dev *client;
+
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return 0;
+
+ client = to_pci_dev(dev);
+
+ switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) {
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ return dma_map_sg_attrs(dev, sg, nents, dir, attrs);
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents);
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
+
+/**
+ * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was
+ * mapped with pci_p2pdma_map_sg()
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: number of elements returned by pci_p2pdma_map_sg()
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
+ */
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct pci_p2pdma_pagemap *p2p_pgmap =
+ to_p2p_pgmap(sg_page(sg)->pgmap);
+ enum pci_p2pdma_map_type map_type;
+ struct pci_dev *client;
+
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return;
+
+ client = to_pci_dev(dev);
+
+ map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client);
+
+ if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
+ dma_unmap_sg_attrs(dev, sg, nents, dir, attrs);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
/**
* pci_p2pdma_enable_store - parse a configfs/sysfs attribute store