From 932c435caba8a2ce473a91753bad0173269ef334 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Mon, 13 Jul 2015 11:40:02 -0700
Subject: PCI: Add dev_flags bit to access VPD through function 0

Add a dev_flags bit, PCI_DEV_FLAGS_VPD_REF_F0, to access VPD through
function 0 to provide VPD access on other functions.  This is for hardware
devices that provide copies of the same VPD capability registers in
multiple functions.  Because the kernel expects that each function has its
own registers, both the locking and the state tracking are affected by VPD
accesses to different functions.

On such devices for example, if a VPD write is performed on function 0,
*any* later attempt to read VPD from any other function of that device will
hang.  This has to do with how the kernel tracks the expected value of the
F bit per function.

Concurrent accesses to different functions of the same device can not only
hang but also corrupt both read and write VPD data.

When hangs occur, typically the error message:

  vpd r/w failed.  This is likely a firmware bug on this device.

will be seen.

Never set this bit on function 0 or there will be an infinite recursion.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Alexander Duyck <alexander.h.duyck@redhat.com>
CC: stable@vger.kernel.org
---
 include/linux/pci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a8fb59..8edb125db13a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -180,6 +180,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6),
 	/* Do not use PM reset even if device advertises NoSoftRst- */
 	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
+	/* Get VPD from function 0 VPD */
+	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3


From 890e4847587fcff5eb0438e90992ad7d2a261f33 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 10 Jun 2015 16:54:58 +0800
Subject: PCI: Add pcibios_alloc_irq() and pcibios_free_irq()

Add pcibios_alloc_irq() and pcibios_free_irq(), which are called when
binding/unbinding PCI device drivers.

PCI arch code may implement these to manage IRQ resources for hotplugged
devices.

[bhelgaas: changelog]
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/pci-driver.c | 26 ++++++++++++++++++++------
 include/linux/pci.h      |  2 ++
 2 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 3cb2210de553..52a880ca1768 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -388,18 +388,31 @@ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
 	return error;
 }
 
+int __weak pcibios_alloc_irq(struct pci_dev *dev)
+{
+	return 0;
+}
+
+void __weak pcibios_free_irq(struct pci_dev *dev)
+{
+}
+
 static int pci_device_probe(struct device *dev)
 {
-	int error = 0;
-	struct pci_driver *drv;
-	struct pci_dev *pci_dev;
+	int error;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct pci_driver *drv = to_pci_driver(dev->driver);
+
+	error = pcibios_alloc_irq(pci_dev);
+	if (error < 0)
+		return error;
 
-	drv = to_pci_driver(dev->driver);
-	pci_dev = to_pci_dev(dev);
 	pci_dev_get(pci_dev);
 	error = __pci_device_probe(drv, pci_dev);
-	if (error)
+	if (error) {
+		pcibios_free_irq(pci_dev);
 		pci_dev_put(pci_dev);
+	}
 
 	return error;
 }
@@ -415,6 +428,7 @@ static int pci_device_remove(struct device *dev)
 			drv->remove(pci_dev);
 			pm_runtime_put_noidle(dev);
 		}
+		pcibios_free_irq(pci_dev);
 		pci_dev->driver = NULL;
 	}
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a8fb59..b4832c92f23e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1645,6 +1645,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 int pcibios_add_device(struct pci_dev *dev);
 void pcibios_release_device(struct pci_dev *dev);
 void pcibios_penalize_isa_irq(int irq, int active);
+int pcibios_alloc_irq(struct pci_dev *dev);
+void pcibios_free_irq(struct pci_dev *dev);
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern struct dev_pm_ops pcibios_pm_ops;
-- 
cgit v1.2.3


From 811a4e6fce09bc9239c664c6a1a53645a678c303 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 10 Jun 2015 16:55:00 +0800
Subject: PCI: Add helpers to manage pci_dev->irq and pci_dev->irq_managed

Add pci_has_managed_irq(), pci_set_managed_irq(), and
pci_reset_managed_irq() to simplify code.  No functional change.

[bhelgaas: changelog]
Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/intel_mid_pci.c |  4 ++--
 arch/x86/pci/irq.c           | 10 ++++------
 drivers/acpi/pci_irq.c       | 10 ++++------
 include/linux/pci.h          | 17 +++++++++++++++++
 4 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index fb7a1f96d80c..22aaefb4f1ca 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -211,7 +211,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	struct irq_alloc_info info;
 	int polarity;
 
-	if (dev->irq_managed && dev->irq > 0)
+	if (pci_has_managed_irq(dev))
 		return 0;
 
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
@@ -234,7 +234,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (dev->irq_managed && dev->irq > 0) {
+	if (pci_has_managed_irq(dev)) {
 		mp_unmap_irq(dev->irq);
 		dev->irq_managed = 0;
 		/*
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 72108f0b66b1..32e70343e6fd 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			struct pci_dev *temp_dev;
 			int irq;
 
-			if (dev->irq_managed && dev->irq > 0)
+			if (pci_has_managed_irq(dev))
 				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,8 +1230,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
-				dev->irq_managed = 1;
-				dev->irq = irq;
+				pci_set_managed_irq(dev, irq);
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
 				return 0;
@@ -1259,9 +1258,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
 
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-	if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
+	if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
 		mp_unmap_irq(dev->irq);
-		dev->irq = 0;
-		dev->irq_managed = 0;
+		pci_reset_managed_irq(dev);
 	}
 }
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d1aad6900b4c..afa16c557c17 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -412,7 +412,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return 0;
 	}
 
-	if (dev->irq_managed && dev->irq > 0)
+	if (pci_has_managed_irq(dev))
 		return 0;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -457,8 +457,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		kfree(entry);
 		return rc;
 	}
-	dev->irq = rc;
-	dev->irq_managed = 1;
+	pci_set_managed_irq(dev, rc);
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -481,7 +480,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin || !dev->irq_managed || dev->irq <= 0)
+	if (!pin || !pci_has_managed_irq(dev))
 		return;
 
 	entry = acpi_pci_irq_lookup(dev, pin);
@@ -503,7 +502,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
 	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
-		dev->irq_managed = 0;
-		dev->irq = 0;
+		pci_reset_managed_irq(dev);
 	}
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b4832c92f23e..b7ab0c424ed6 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -963,6 +963,23 @@ static inline int pci_is_managed(struct pci_dev *pdev)
 	return pdev->is_managed;
 }
 
+static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
+{
+	pdev->irq = irq;
+	pdev->irq_managed = 1;
+}
+
+static inline void pci_reset_managed_irq(struct pci_dev *pdev)
+{
+	pdev->irq = 0;
+	pdev->irq_managed = 0;
+}
+
+static inline bool pci_has_managed_irq(struct pci_dev *pdev)
+{
+	return pdev->irq_managed && pdev->irq > 0;
+}
+
 void pci_disable_device(struct pci_dev *dev);
 
 extern unsigned int pcibios_max_latency;
-- 
cgit v1.2.3


From 67546762978f523749eac157903e0b01c18e083a Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Fri, 17 Jul 2015 17:16:31 +0800
Subject: PCI: Protect pci_bus->slots with pci_slot_mutex, not pci_bus_sem

Rajat Jain reported a deadlock when PCIe hot-add and AER recovery happen at
the same time:

thread 1:

  pciehp_enable_slot
    pciehp_configure_device
      pci_bus_add_devices
        pci_bus_add_device
          device_attach
            device_lock(dev)                       # acquire device lock
            ...
            pciehp_probe
              init_slot
                pci_hp_register
                  pci_create_slot
                    down_write(pci_bus_sem)        # deadlock here

thread 2:

  aer_isr_one_error
    aer_process_err_device
      do_recovery
        broadcast_error_message(..., report_error_detected)
          pci_walk_bus(..., cb=report_error_detected, ...)
            down_read(&pci_bus_sem)                # acquire pci_bus_sem
            report_error_detected(dev)             # cb()
              device_lock(dev)                     # deadlock here

Previously, the bus->devices and bus->slots list were protected by
pci_bus_sem.  In pci_create_slot(), we held it for writing so we could
add to the bus->slots list.

Add a new local pci_slot_mutex to protect bus->slots.  Hold pci_bus_sem for
reading while searching the bus->devices list.

[bhelgaas: changelog]
Link: http://lkml.kernel.org/r/CAA93t1qpPqbih+UB0McA_d_+2rVaNkXsinAUxYzK9+JXSS+L-g@mail.gmail.com
Reported-by: Rajat Jain <rajatja@google.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/slot.c  | 18 +++++++++++-------
 include/linux/pci.h |  3 ++-
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 396c200b9ddb..4bd3fce93fa4 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -14,6 +14,7 @@
 
 struct kset *pci_slots_kset;
 EXPORT_SYMBOL_GPL(pci_slots_kset);
+static DEFINE_MUTEX(pci_slot_mutex);
 
 static ssize_t pci_slot_attr_show(struct kobject *kobj,
 					struct attribute *attr, char *buf)
@@ -106,9 +107,11 @@ static void pci_slot_release(struct kobject *kobj)
 	dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n",
 		slot->number, pci_slot_name(slot));
 
+	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &slot->bus->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot->number)
 			dev->slot = NULL;
+	up_read(&pci_bus_sem);
 
 	list_del(&slot->list);
 
@@ -194,9 +197,8 @@ static int rename_slot(struct pci_slot *slot, const char *name)
 static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
 {
 	struct pci_slot *slot;
-	/*
-	 * We already hold pci_bus_sem so don't worry
-	 */
+
+	/* We already hold pci_slot_mutex */
 	list_for_each_entry(slot, &parent->slots, list)
 		if (slot->number == slot_nr) {
 			kobject_get(&slot->kobj);
@@ -253,7 +255,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 	int err = 0;
 	char *slot_name = NULL;
 
-	down_write(&pci_bus_sem);
+	mutex_lock(&pci_slot_mutex);
 
 	if (slot_nr == -1)
 		goto placeholder;
@@ -301,16 +303,18 @@ placeholder:
 	INIT_LIST_HEAD(&slot->list);
 	list_add(&slot->list, &parent->slots);
 
+	down_read(&pci_bus_sem);
 	list_for_each_entry(dev, &parent->devices, bus_list)
 		if (PCI_SLOT(dev->devfn) == slot_nr)
 			dev->slot = slot;
+	up_read(&pci_bus_sem);
 
 	dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n",
 		slot_nr, pci_slot_name(slot));
 
 out:
 	kfree(slot_name);
-	up_write(&pci_bus_sem);
+	mutex_unlock(&pci_slot_mutex);
 	return slot;
 err:
 	kfree(slot);
@@ -332,9 +336,9 @@ void pci_destroy_slot(struct pci_slot *slot)
 	dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n",
 		slot->number, atomic_read(&slot->kobj.kref.refcount) - 1);
 
-	down_write(&pci_bus_sem);
+	mutex_lock(&pci_slot_mutex);
 	kobject_put(&slot->kobj);
-	up_write(&pci_bus_sem);
+	mutex_unlock(&pci_slot_mutex);
 }
 EXPORT_SYMBOL_GPL(pci_destroy_slot);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a8fb59..aaee493174e2 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -446,7 +446,8 @@ struct pci_bus {
 	struct list_head children;	/* list of child buses */
 	struct list_head devices;	/* list of devices on this bus */
 	struct pci_dev	*self;		/* bridge device as seen by parent */
-	struct list_head slots;		/* list of slots on this bus */
+	struct list_head slots;		/* list of slots on this bus;
+					   protected by pci_slot_mutex */
 	struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
 	struct list_head resources;	/* address space routed to this bus */
 	struct resource busn_res;	/* bus numbers routed to this bus */
-- 
cgit v1.2.3


From 017ffe64e8b8c8db0f50433a71da41c6a4e12710 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Fri, 17 Jul 2015 17:16:32 +0800
Subject: PCI: Hold pci_slot_mutex while searching bus->slots list

Previously, pci_setup_device() and similar functions searched the
pci_bus->slots list without any locking.  It was possible for another
thread to update the list while we searched it.

Add pci_dev_assign_slot() to search the list while holding pci_slot_mutex.

[bhelgaas: changelog, fold in CONFIG_SYSFS fix]
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/powerpc/kernel/pci_of_scan.c |  6 +-----
 arch/sparc/kernel/pci.c           |  6 +-----
 drivers/pci/probe.c               |  6 +-----
 drivers/pci/slot.c                | 11 +++++++++++
 include/linux/pci.h               |  5 +++++
 5 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 42e02a2d570b..5e2debfc6ce5 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -126,7 +126,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 {
 	struct pci_dev *dev;
 	const char *type;
-	struct pci_slot *slot;
 
 	dev = pci_alloc_dev(bus);
 	if (!dev)
@@ -145,10 +144,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
 	dev->needs_freset = 0;		/* pcie fundamental reset required */
 	set_pcie_port_type(dev);
 
-	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
-			dev->slot = slot;
-
+	pci_dev_assign_slot(dev);
 	dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
 	dev->device = get_int_prop(node, "device-id", 0xffff);
 	dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index c928bc64b4ba..3a0e1a986bfe 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -249,7 +249,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 					 struct pci_bus *bus, int devfn)
 {
 	struct dev_archdata *sd;
-	struct pci_slot *slot;
 	struct platform_device *op;
 	struct pci_dev *dev;
 	const char *type;
@@ -290,10 +289,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
 	dev->multifunction = 0;		/* maybe a lie? */
 	set_pcie_port_type(dev);
 
-	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
-			dev->slot = slot;
-
+	pci_dev_assign_slot(dev);
 	dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
 	dev->device = of_getintprop_default(node, "device-id", 0xffff);
 	dev->subsystem_vendor =
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cefd636681b6..2a9ce16cb374 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1133,7 +1133,6 @@ int pci_setup_device(struct pci_dev *dev)
 {
 	u32 class;
 	u8 hdr_type;
-	struct pci_slot *slot;
 	int pos = 0;
 	struct pci_bus_region region;
 	struct resource *res;
@@ -1149,10 +1148,7 @@ int pci_setup_device(struct pci_dev *dev)
 	dev->error_state = pci_channel_io_normal;
 	set_pcie_port_type(dev);
 
-	list_for_each_entry(slot, &dev->bus->slots, list)
-		if (PCI_SLOT(dev->devfn) == slot->number)
-			dev->slot = slot;
-
+	pci_dev_assign_slot(dev);
 	/* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer)
 	   set this higher, assuming the system even supports it.  */
 	dev->dma_mask = 0xffffffff;
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 4bd3fce93fa4..429d34c348b9 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -194,6 +194,17 @@ static int rename_slot(struct pci_slot *slot, const char *name)
 	return result;
 }
 
+void pci_dev_assign_slot(struct pci_dev *dev)
+{
+	struct pci_slot *slot;
+
+	mutex_lock(&pci_slot_mutex);
+	list_for_each_entry(slot, &dev->bus->slots, list)
+		if (PCI_SLOT(dev->devfn) == slot->number)
+			dev->slot = slot;
+	mutex_unlock(&pci_slot_mutex);
+}
+
 static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr)
 {
 	struct pci_slot *slot;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aaee493174e2..b3ba7fef2916 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -798,6 +798,11 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name,
 				 struct hotplug_slot *hotplug);
 void pci_destroy_slot(struct pci_slot *slot);
+#ifdef CONFIG_SYSFS
+void pci_dev_assign_slot(struct pci_dev *dev);
+#else
+static inline void pci_dev_assign_slot(struct pci_dev *dev) { }
+#endif
 int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn);
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
-- 
cgit v1.2.3


From edc90fee916b4f0d14af9c6b5c08666747488ef8 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 17 Jul 2015 15:05:46 -0500
Subject: PCI: Allocate ATS struct during enumeration

Previously, we allocated pci_ats structures when an IOMMU driver called
pci_enable_ats().  An SR-IOV VF shares the STU setting with its PF, so when
enabling ATS on the VF, we allocated a pci_ats struct for the PF if it
didn't already have one.  We held the sriov->lock to serialize threads
concurrently enabling ATS on several VFS so only one would allocate the PF
pci_ats.

Gregor reported a deadlock here:

  pci_enable_sriov
    sriov_enable
      virtfn_add
        mutex_lock(dev->sriov->lock)      # acquire sriov->lock
        pci_device_add
          device_add
            BUS_NOTIFY_ADD_DEVICE notifier chain
            iommu_bus_notifier
              amd_iommu_add_device        # iommu_ops.add_device
                init_iommu_group
                  iommu_group_get_for_dev
                    iommu_group_add_device
                      __iommu_attach_device
                        amd_iommu_attach_device  # iommu_ops.attach_device
                          attach_device
                            pci_enable_ats
                              mutex_lock(dev->sriov->lock) # deadlock

There's no reason to delay allocating the pci_ats struct, and if we
allocate it for each device at enumeration-time, there's no need for
locking in pci_enable_ats().

Allocate pci_ats struct during enumeration, when we initialize other
capabilities.

Note that this implementation requires ATS to be enabled on the PF first,
before on any of the VFs because the PF controls the STU for all the VFs.

Link: http://permalink.gmane.org/gmane.linux.kernel.iommu/9433
Reported-by: Gregor Dick <gdick@solarflare.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c       | 98 +++++++++++++++++++++----------------------------
 drivers/pci/probe.c     |  3 ++
 drivers/pci/remove.c    |  1 +
 include/linux/pci-ats.h |  2 +-
 include/linux/pci.h     |  9 +++++
 5 files changed, 56 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index a8099d4d0c9d..2026f5388796 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -17,7 +17,7 @@
 
 #include "pci.h"
 
-static int ats_alloc_one(struct pci_dev *dev, int ps)
+static void ats_alloc_one(struct pci_dev *dev)
 {
 	int pos;
 	u16 cap;
@@ -25,20 +25,19 @@ static int ats_alloc_one(struct pci_dev *dev, int ps)
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
 	if (!pos)
-		return -ENODEV;
+		return;
 
 	ats = kzalloc(sizeof(*ats), GFP_KERNEL);
-	if (!ats)
-		return -ENOMEM;
+	if (!ats) {
+		dev_warn(&dev->dev, "can't allocate space for ATS state\n");
+		return;
+	}
 
 	ats->pos = pos;
-	ats->stu = ps;
 	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
 	ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
 					    PCI_ATS_MAX_QDEP;
 	dev->ats = ats;
-
-	return 0;
 }
 
 static void ats_free_one(struct pci_dev *dev)
@@ -47,6 +46,16 @@ static void ats_free_one(struct pci_dev *dev)
 	dev->ats = NULL;
 }
 
+void pci_ats_init(struct pci_dev *dev)
+{
+	ats_alloc_one(dev);
+}
+
+void pci_ats_free(struct pci_dev *dev)
+{
+	ats_free_one(dev);
+}
+
 /**
  * pci_enable_ats - enable the ATS capability
  * @dev: the PCI device
@@ -56,43 +65,35 @@ static void ats_free_one(struct pci_dev *dev)
  */
 int pci_enable_ats(struct pci_dev *dev, int ps)
 {
-	int rc;
 	u16 ctrl;
 
 	BUG_ON(dev->ats && dev->ats->is_enabled);
 
+	if (!dev->ats)
+		return -EINVAL;
+
 	if (ps < PCI_ATS_MIN_STU)
 		return -EINVAL;
 
-	if (dev->is_physfn || dev->is_virtfn) {
-		struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
+	/*
+	 * Note that enabling ATS on a VF fails unless it's already enabled
+	 * with the same STU on the PF.
+	 */
+	ctrl = PCI_ATS_CTRL_ENABLE;
+	if (dev->is_virtfn) {
+		struct pci_dev *pdev = dev->physfn;
 
-		mutex_lock(&pdev->sriov->lock);
-		if (pdev->ats)
-			rc = pdev->ats->stu == ps ? 0 : -EINVAL;
-		else
-			rc = ats_alloc_one(pdev, ps);
+		if (pdev->ats->stu != ps)
+			return -EINVAL;
 
-		if (!rc)
-			pdev->ats->ref_cnt++;
-		mutex_unlock(&pdev->sriov->lock);
-		if (rc)
-			return rc;
-	}
-
-	if (!dev->is_physfn) {
-		rc = ats_alloc_one(dev, ps);
-		if (rc)
-			return rc;
+		atomic_inc(&pdev->ats->ref_cnt);  /* count enabled VFs */
+	} else {
+		dev->ats->stu = ps;
+		ctrl |= PCI_ATS_CTRL_STU(dev->ats->stu - PCI_ATS_MIN_STU);
 	}
-
-	ctrl = PCI_ATS_CTRL_ENABLE;
-	if (!dev->is_virtfn)
-		ctrl |= PCI_ATS_CTRL_STU(ps - PCI_ATS_MIN_STU);
 	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
 
 	dev->ats->is_enabled = 1;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_enable_ats);
@@ -107,24 +108,20 @@ void pci_disable_ats(struct pci_dev *dev)
 
 	BUG_ON(!dev->ats || !dev->ats->is_enabled);
 
+	if (atomic_read(&dev->ats->ref_cnt))
+		return;		/* VFs still enabled */
+
+	if (dev->is_virtfn) {
+		struct pci_dev *pdev = dev->physfn;
+
+		atomic_dec(&pdev->ats->ref_cnt);
+	}
+
 	pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
 	ctrl &= ~PCI_ATS_CTRL_ENABLE;
 	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
 
 	dev->ats->is_enabled = 0;
-
-	if (dev->is_physfn || dev->is_virtfn) {
-		struct pci_dev *pdev = dev->is_physfn ? dev : dev->physfn;
-
-		mutex_lock(&pdev->sriov->lock);
-		pdev->ats->ref_cnt--;
-		if (!pdev->ats->ref_cnt)
-			ats_free_one(pdev);
-		mutex_unlock(&pdev->sriov->lock);
-	}
-
-	if (!dev->is_physfn)
-		ats_free_one(dev);
 }
 EXPORT_SYMBOL_GPL(pci_disable_ats);
 
@@ -140,7 +137,6 @@ void pci_restore_ats_state(struct pci_dev *dev)
 	ctrl = PCI_ATS_CTRL_ENABLE;
 	if (!dev->is_virtfn)
 		ctrl |= PCI_ATS_CTRL_STU(dev->ats->stu - PCI_ATS_MIN_STU);
-
 	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
 }
 EXPORT_SYMBOL_GPL(pci_restore_ats_state);
@@ -159,23 +155,13 @@ EXPORT_SYMBOL_GPL(pci_restore_ats_state);
  */
 int pci_ats_queue_depth(struct pci_dev *dev)
 {
-	int pos;
-	u16 cap;
-
 	if (dev->is_virtfn)
 		return 0;
 
 	if (dev->ats)
 		return dev->ats->qdep;
 
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
-	if (!pos)
-		return -ENODEV;
-
-	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
-
-	return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
-				       PCI_ATS_MAX_QDEP;
+	return -ENODEV;
 }
 EXPORT_SYMBOL_GPL(pci_ats_queue_depth);
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cefd636681b6..c206398ca67e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1540,6 +1540,9 @@ static void pci_init_capabilities(struct pci_dev *dev)
 	/* Single Root I/O Virtualization */
 	pci_iov_init(dev);
 
+	/* Address Translation Services */
+	pci_ats_init(dev);
+
 	/* Enable ACS P2P upstream forwarding */
 	pci_enable_acs(dev);
 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 8a280e9c2ad1..27617b862ca8 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -26,6 +26,7 @@ static void pci_stop_dev(struct pci_dev *dev)
 		dev->is_added = 0;
 	}
 
+	pci_ats_free(dev);
 	if (dev->bus->self)
 		pcie_aspm_exit_link_state(dev);
 }
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 72031785fe1d..e2dcc2ff3d0e 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -8,7 +8,7 @@ struct pci_ats {
 	int pos;        /* capability position */
 	int stu;        /* Smallest Translation Unit */
 	int qdep;       /* Invalidate Queue Depth */
-	int ref_cnt;    /* Physical Function reference count */
+	atomic_t ref_cnt; /* number of VFs with ATS enabled */
 	unsigned int is_enabled:1;      /* Enable bit is set */
 };
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a8fb59..1817819ba57b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1294,6 +1294,15 @@ int  ht_create_irq(struct pci_dev *dev, int idx);
 void ht_destroy_irq(unsigned int irq);
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
+void pci_ats_free(struct pci_dev *dev);
+#else
+static inline void pci_ats_init(struct pci_dev *dev) { }
+static inline void pci_ats_free(struct pci_dev *dev) { }
+#endif
+
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
-- 
cgit v1.2.3


From d544d75ac96aa1b0a8a378826626a0fbd8ce4380 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 17 Jul 2015 15:15:19 -0500
Subject: PCI: Embed ATS info directly into struct pci_dev

The pci_ats struct is small and will get smaller, so I don't think it's
worth allocating it separately from the pci_dev struct.

Embed the ATS fields directly into struct pci_dev.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c       | 61 +++++++++++++++++--------------------------------
 drivers/pci/remove.c    |  1 -
 include/linux/pci-ats.h | 10 +-------
 include/linux/pci.h     |  8 ++++---
 4 files changed, 27 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 2026f5388796..690ae6e6786c 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -21,29 +21,15 @@ static void ats_alloc_one(struct pci_dev *dev)
 {
 	int pos;
 	u16 cap;
-	struct pci_ats *ats;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
 	if (!pos)
 		return;
 
-	ats = kzalloc(sizeof(*ats), GFP_KERNEL);
-	if (!ats) {
-		dev_warn(&dev->dev, "can't allocate space for ATS state\n");
-		return;
-	}
-
-	ats->pos = pos;
-	pci_read_config_word(dev, pos + PCI_ATS_CAP, &cap);
-	ats->qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
+	dev->ats_cap = pos;
+	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CAP, &cap);
+	dev->ats_qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
 					    PCI_ATS_MAX_QDEP;
-	dev->ats = ats;
-}
-
-static void ats_free_one(struct pci_dev *dev)
-{
-	kfree(dev->ats);
-	dev->ats = NULL;
 }
 
 void pci_ats_init(struct pci_dev *dev)
@@ -51,11 +37,6 @@ void pci_ats_init(struct pci_dev *dev)
 	ats_alloc_one(dev);
 }
 
-void pci_ats_free(struct pci_dev *dev)
-{
-	ats_free_one(dev);
-}
-
 /**
  * pci_enable_ats - enable the ATS capability
  * @dev: the PCI device
@@ -67,9 +48,9 @@ int pci_enable_ats(struct pci_dev *dev, int ps)
 {
 	u16 ctrl;
 
-	BUG_ON(dev->ats && dev->ats->is_enabled);
+	BUG_ON(dev->ats_cap && dev->ats_enabled);
 
-	if (!dev->ats)
+	if (!dev->ats_cap)
 		return -EINVAL;
 
 	if (ps < PCI_ATS_MIN_STU)
@@ -83,17 +64,17 @@ int pci_enable_ats(struct pci_dev *dev, int ps)
 	if (dev->is_virtfn) {
 		struct pci_dev *pdev = dev->physfn;
 
-		if (pdev->ats->stu != ps)
+		if (pdev->ats_stu != ps)
 			return -EINVAL;
 
-		atomic_inc(&pdev->ats->ref_cnt);  /* count enabled VFs */
+		atomic_inc(&pdev->ats_ref_cnt);  /* count enabled VFs */
 	} else {
-		dev->ats->stu = ps;
-		ctrl |= PCI_ATS_CTRL_STU(dev->ats->stu - PCI_ATS_MIN_STU);
+		dev->ats_stu = ps;
+		ctrl |= PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
 	}
-	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
 
-	dev->ats->is_enabled = 1;
+	dev->ats_enabled = 1;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pci_enable_ats);
@@ -106,22 +87,22 @@ void pci_disable_ats(struct pci_dev *dev)
 {
 	u16 ctrl;
 
-	BUG_ON(!dev->ats || !dev->ats->is_enabled);
+	BUG_ON(!dev->ats_cap || !dev->ats_enabled);
 
-	if (atomic_read(&dev->ats->ref_cnt))
+	if (atomic_read(&dev->ats_ref_cnt))
 		return;		/* VFs still enabled */
 
 	if (dev->is_virtfn) {
 		struct pci_dev *pdev = dev->physfn;
 
-		atomic_dec(&pdev->ats->ref_cnt);
+		atomic_dec(&pdev->ats_ref_cnt);
 	}
 
-	pci_read_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, &ctrl);
+	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, &ctrl);
 	ctrl &= ~PCI_ATS_CTRL_ENABLE;
-	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
 
-	dev->ats->is_enabled = 0;
+	dev->ats_enabled = 0;
 }
 EXPORT_SYMBOL_GPL(pci_disable_ats);
 
@@ -136,8 +117,8 @@ void pci_restore_ats_state(struct pci_dev *dev)
 
 	ctrl = PCI_ATS_CTRL_ENABLE;
 	if (!dev->is_virtfn)
-		ctrl |= PCI_ATS_CTRL_STU(dev->ats->stu - PCI_ATS_MIN_STU);
-	pci_write_config_word(dev, dev->ats->pos + PCI_ATS_CTRL, ctrl);
+		ctrl |= PCI_ATS_CTRL_STU(dev->ats_stu - PCI_ATS_MIN_STU);
+	pci_write_config_word(dev, dev->ats_cap + PCI_ATS_CTRL, ctrl);
 }
 EXPORT_SYMBOL_GPL(pci_restore_ats_state);
 
@@ -158,8 +139,8 @@ int pci_ats_queue_depth(struct pci_dev *dev)
 	if (dev->is_virtfn)
 		return 0;
 
-	if (dev->ats)
-		return dev->ats->qdep;
+	if (dev->ats_cap)
+		return dev->ats_qdep;
 
 	return -ENODEV;
 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 27617b862ca8..8a280e9c2ad1 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -26,7 +26,6 @@ static void pci_stop_dev(struct pci_dev *dev)
 		dev->is_added = 0;
 	}
 
-	pci_ats_free(dev);
 	if (dev->bus->self)
 		pcie_aspm_exit_link_state(dev);
 }
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index e2dcc2ff3d0e..5d81d47b0a95 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -4,14 +4,6 @@
 #include <linux/pci.h>
 
 /* Address Translation Service */
-struct pci_ats {
-	int pos;        /* capability position */
-	int stu;        /* Smallest Translation Unit */
-	int qdep;       /* Invalidate Queue Depth */
-	atomic_t ref_cnt; /* number of VFs with ATS enabled */
-	unsigned int is_enabled:1;      /* Enable bit is set */
-};
-
 #ifdef CONFIG_PCI_ATS
 
 int pci_enable_ats(struct pci_dev *dev, int ps);
@@ -26,7 +18,7 @@ int pci_ats_queue_depth(struct pci_dev *dev);
  */
 static inline int pci_ats_enabled(struct pci_dev *dev)
 {
-	return dev->ats && dev->ats->is_enabled;
+	return dev->ats_cap && dev->ats_enabled;
 }
 
 #else /* CONFIG_PCI_ATS */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 1817819ba57b..8bc16b5e4747 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -343,6 +343,7 @@ struct pci_dev {
 	unsigned int	msi_enabled:1;
 	unsigned int	msix_enabled:1;
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
+	unsigned int	ats_enabled:1;	/* Address Translation Service */
 	unsigned int	is_managed:1;
 	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
@@ -375,7 +376,10 @@ struct pci_dev {
 		struct pci_sriov *sriov;	/* SR-IOV capability related */
 		struct pci_dev *physfn;	/* the PF this VF is associated with */
 	};
-	struct pci_ats	*ats;	/* Address Translation Service */
+	int		ats_cap;	/* ATS Capability offset */
+	int		ats_stu;	/* ATS Smallest Translation Unit */
+	int		ats_qdep;	/* ATS Invalidate Queue Depth */
+	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
 #endif
 	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 	size_t romlen; /* Length of ROM if it's not from the BAR */
@@ -1297,10 +1301,8 @@ void ht_destroy_irq(unsigned int irq);
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
-void pci_ats_free(struct pci_dev *dev);
 #else
 static inline void pci_ats_init(struct pci_dev *dev) { }
-static inline void pci_ats_free(struct pci_dev *dev) { }
 #endif
 
 void pci_cfg_access_lock(struct pci_dev *dev);
-- 
cgit v1.2.3


From 67930995d7fb8ae7d2078822b563010b289ace2e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 17 Jul 2015 15:27:34 -0500
Subject: PCI: Reduce size of ATS structure elements

The extended capabilities list is linked with 12-bit pointers, and the ATS
Smallest Translation Unit and Invalidate Queue Depth fields are both 5
bits.

Use u16 and u8 to hold the extended capability address and the stu and qdep
values.  No functional change.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/pci.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8bc16b5e4747..238b77e8ca41 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -376,9 +376,9 @@ struct pci_dev {
 		struct pci_sriov *sriov;	/* SR-IOV capability related */
 		struct pci_dev *physfn;	/* the PF this VF is associated with */
 	};
-	int		ats_cap;	/* ATS Capability offset */
-	int		ats_stu;	/* ATS Smallest Translation Unit */
-	int		ats_qdep;	/* ATS Invalidate Queue Depth */
+	u16		ats_cap;	/* ATS Capability offset */
+	u8		ats_stu;	/* ATS Smallest Translation Unit */
+	u8		ats_qdep;	/* ATS Invalidate Queue Depth */
 	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
 #endif
 	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
-- 
cgit v1.2.3


From ff9bee895c4d11a519a6b2c49451376025a6af4e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 17 Jul 2015 15:55:48 -0500
Subject: PCI: Move ATS declarations to linux/pci.h so they're all together

Move ATS declarations to linux/pci.h so they're all in one place.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/pci-ats.h | 41 -----------------------------------------
 include/linux/pci.h     | 10 +++++++++-
 2 files changed, 9 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 5d81d47b0a95..57e0b8250947 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -3,47 +3,6 @@
 
 #include <linux/pci.h>
 
-/* Address Translation Service */
-#ifdef CONFIG_PCI_ATS
-
-int pci_enable_ats(struct pci_dev *dev, int ps);
-void pci_disable_ats(struct pci_dev *dev);
-int pci_ats_queue_depth(struct pci_dev *dev);
-
-/**
- * pci_ats_enabled - query the ATS status
- * @dev: the PCI device
- *
- * Returns 1 if ATS capability is enabled, or 0 if not.
- */
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return dev->ats_cap && dev->ats_enabled;
-}
-
-#else /* CONFIG_PCI_ATS */
-
-static inline int pci_enable_ats(struct pci_dev *dev, int ps)
-{
-	return -ENODEV;
-}
-
-static inline void pci_disable_ats(struct pci_dev *dev)
-{
-}
-
-static inline int pci_ats_queue_depth(struct pci_dev *dev)
-{
-	return -ENODEV;
-}
-
-static inline int pci_ats_enabled(struct pci_dev *dev)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PCI_ATS */
-
 #ifdef CONFIG_PCI_PRI
 
 int pci_enable_pri(struct pci_dev *pdev, u32 reqs);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 238b77e8ca41..307f96a58e8b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1301,8 +1301,16 @@ void ht_destroy_irq(unsigned int irq);
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
+int pci_enable_ats(struct pci_dev *dev, int ps);
+void pci_disable_ats(struct pci_dev *dev);
+int pci_ats_queue_depth(struct pci_dev *dev);
+static inline int pci_ats_enabled(struct pci_dev *dev) { return dev->ats_cap && dev->ats_enabled; }
 #else
-static inline void pci_ats_init(struct pci_dev *dev) { }
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
+static inline void pci_disable_ats(struct pci_dev *d) { }
+static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
+static inline int pci_ats_enabled(struct pci_dev *d) { return 0; }
 #endif
 
 void pci_cfg_access_lock(struct pci_dev *dev);
-- 
cgit v1.2.3


From a71f938f3a9a7bc879296cd34ecae9effe5edf3f Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 20 Jul 2015 09:24:32 -0500
Subject: PCI: Stop caching ATS Invalidate Queue Depth

Stop caching the Invalidate Queue Depth in struct pci_dev.
pci_ats_queue_depth() is typically called only once per device, and it
returns a fixed value per-device, so callers who need the value frequently
can cache it themselves.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c   | 9 ++++-----
 include/linux/pci.h | 1 -
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 9355f754c7c2..ceda7dc556d4 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -20,16 +20,12 @@
 void pci_ats_init(struct pci_dev *dev)
 {
 	int pos;
-	u16 cap;
 
 	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ATS);
 	if (!pos)
 		return;
 
 	dev->ats_cap = pos;
-	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CAP, &cap);
-	dev->ats_qdep = PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) :
-					    PCI_ATS_MAX_QDEP;
 }
 
 /**
@@ -131,13 +127,16 @@ EXPORT_SYMBOL_GPL(pci_restore_ats_state);
  */
 int pci_ats_queue_depth(struct pci_dev *dev)
 {
+	u16 cap;
+
 	if (!dev->ats_cap)
 		return -EINVAL;
 
 	if (dev->is_virtfn)
 		return 0;
 
-	return dev->ats_qdep;
+	pci_read_config_word(dev, dev->ats_cap + PCI_ATS_CAP, &cap);
+	return PCI_ATS_CAP_QDEP(cap) ? PCI_ATS_CAP_QDEP(cap) : PCI_ATS_MAX_QDEP;
 }
 EXPORT_SYMBOL_GPL(pci_ats_queue_depth);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 307f96a58e8b..4b484fdfd66f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -378,7 +378,6 @@ struct pci_dev {
 	};
 	u16		ats_cap;	/* ATS Capability offset */
 	u8		ats_stu;	/* ATS Smallest Translation Unit */
-	u8		ats_qdep;	/* ATS Invalidate Queue Depth */
 	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
 #endif
 	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
-- 
cgit v1.2.3


From f7ef1340bb501717372a39f4807d0ad519ebd432 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 20 Jul 2015 09:23:37 -0500
Subject: PCI: Remove pci_ats_enabled()

Remove pci_ats_enabled().  There are no callers outside the ATS code
itself.  We don't need to check ats_cap, because if we don't find an ATS
capability, we'll never set ats_enabled.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c   | 6 +++---
 include/linux/pci.h | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index ceda7dc556d4..eeb9fb2b47aa 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -43,7 +43,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps)
 	if (!dev->ats_cap)
 		return -EINVAL;
 
-	if (WARN_ON(pci_ats_enabled(dev)))
+	if (WARN_ON(dev->ats_enabled))
 		return -EBUSY;
 
 	if (ps < PCI_ATS_MIN_STU)
@@ -80,7 +80,7 @@ void pci_disable_ats(struct pci_dev *dev)
 	struct pci_dev *pdev;
 	u16 ctrl;
 
-	if (WARN_ON(!pci_ats_enabled(dev)))
+	if (WARN_ON(!dev->ats_enabled))
 		return;
 
 	if (atomic_read(&dev->ats_ref_cnt))
@@ -103,7 +103,7 @@ void pci_restore_ats_state(struct pci_dev *dev)
 {
 	u16 ctrl;
 
-	if (!pci_ats_enabled(dev))
+	if (!dev->ats_enabled)
 		return;
 
 	ctrl = PCI_ATS_CTRL_ENABLE;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4b484fdfd66f..806da7634f91 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1303,13 +1303,11 @@ void pci_ats_init(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
-static inline int pci_ats_enabled(struct pci_dev *dev) { return dev->ats_cap && dev->ats_enabled; }
 #else
 static inline void pci_ats_init(struct pci_dev *d) { }
 static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
-static inline int pci_ats_enabled(struct pci_dev *d) { return 0; }
 #endif
 
 void pci_cfg_access_lock(struct pci_dev *dev);
-- 
cgit v1.2.3


From d2a7926d42b3b46e45b4e44dc3302b2701ec0856 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 3 Aug 2015 21:27:10 -0500
Subject: PCI: Add pci_scan_root_bus_msi()

Add a pci_scan_root_bus_msi() interface so an arch can specify the MSI
controller up front.  This removes the need for a pcibios callback to set
the MSI controller later.

This is not exported because I'd like to replace the variety of "scan root
bus" interfaces with a single, more extensible interface that can handle
the MSI controller, domain, pci_ops, resources, etc.  I hope this interface
is temporary.

[bhelgaas: changelog, split into separate patch]
Suggested-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jingoo Han <jingoohan1@gmail.com>
---
 drivers/pci/probe.c | 14 ++++++++++++--
 include/linux/pci.h |  4 ++++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cefd636681b6..9ff4df0db0c3 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2096,8 +2096,9 @@ void pci_bus_release_busn_res(struct pci_bus *b)
 			res, ret ? "can not be" : "is");
 }
 
-struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
-		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata,
+		struct list_head *resources, struct msi_controller *msi)
 {
 	struct resource_entry *window;
 	bool found = false;
@@ -2114,6 +2115,8 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 	if (!b)
 		return NULL;
 
+	b->msi = msi;
+
 	if (!found) {
 		dev_info(&b->dev,
 		 "No busn resource found for root bus, will use [bus %02x-ff]\n",
@@ -2128,6 +2131,13 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 
 	return b;
 }
+
+struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
+		struct pci_ops *ops, void *sysdata, struct list_head *resources)
+{
+	return pci_scan_root_bus_msi(parent, bus, ops, sysdata, resources,
+				     NULL);
+}
 EXPORT_SYMBOL(pci_scan_root_bus);
 
 struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8a0321a8fb59..4d4f9d29fcc9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -787,6 +787,10 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 int pci_bus_insert_busn_res(struct pci_bus *b, int bus, int busmax);
 int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
 void pci_bus_release_busn_res(struct pci_bus *b);
+struct pci_bus *pci_scan_root_bus_msi(struct device *parent, int bus,
+				      struct pci_ops *ops, void *sysdata,
+				      struct list_head *resources,
+				      struct msi_controller *msi);
 struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 					     struct pci_ops *ops, void *sysdata,
 					     struct list_head *resources);
-- 
cgit v1.2.3


From 27d868b5e6cfaee4fec66b388e4085ff94050fa7 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 24 Aug 2015 08:48:16 -0500
Subject: PCI: Set MPS to match upstream bridge

Firmware typically configures the PCIe fabric with a consistent Max Payload
Size setting based on the devices present at boot.  A hot-added device
typically has the power-on default MPS setting (128 bytes), which may not
match the fabric.

The previous Linux default, in the absence of any "pci=pcie_bus_*" options,
was PCIE_BUS_TUNE_OFF, in which we never touch MPS, even for hot-added
devices.

Add a new default setting, PCIE_BUS_DEFAULT, in which we make sure every
device's MPS setting matches the upstream bridge.  This makes it more
likely that a hot-added device will work in a system with optimized MPS
configuration.

Note that if we hot-add a device that only supports 128-byte MPS, it still
likely won't work because we don't reconfigure the rest of the fabric.
Booting with "pci=pcie_bus_peer2peer" is a workaround for this because it
sets MPS to 128 for everything.

[bhelgaas: changelog, new default, rework for pci_configure_device() path]
Tested-by: Keith Busch <keith.busch@intel.com>
Tested-by: Jordan Hargrave <jharg93@gmail.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
---
 drivers/pci/pci.c    |  2 +-
 drivers/pci/probe.c  | 22 ++++++++++++++++++++--
 drivers/pci/quirks.c |  3 ++-
 include/linux/pci.h  |  9 +++++----
 4 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0008c950452c..b96b4ccc2819 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -81,7 +81,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
 unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
 
-enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF;
+enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_DEFAULT;
 
 /*
  * The default CLS is used if arch didn't set CLS explicitly and not
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index eb32395d5897..eebabe3a814e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1278,7 +1278,7 @@ int pci_setup_device(struct pci_dev *dev)
 static void pci_configure_mps(struct pci_dev *dev)
 {
 	struct pci_dev *bridge = pci_upstream_bridge(dev);
-	int mps, p_mps;
+	int mps, p_mps, rc;
 
 	if (!pci_is_pcie(dev) || !bridge || !pci_is_pcie(bridge))
 		return;
@@ -1294,6 +1294,23 @@ static void pci_configure_mps(struct pci_dev *dev)
 			 mps, pci_name(bridge), p_mps);
 		return;
 	}
+
+	/*
+	 * Fancier MPS configuration is done later by
+	 * pcie_bus_configure_settings()
+	 */
+	if (pcie_bus_config != PCIE_BUS_DEFAULT)
+		return;
+
+	rc = pcie_set_mps(dev, p_mps);
+	if (rc) {
+		dev_warn(&dev->dev, "can't set Max Payload Size to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n",
+			 p_mps);
+		return;
+	}
+
+	dev_info(&dev->dev, "Max Payload Size set to %d (was %d, max %d)\n",
+		 p_mps, mps, 128 << dev->pcie_mpss);
 }
 
 static struct hpp_type0 pci_default_type0 = {
@@ -1821,7 +1838,8 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data)
 	if (!pci_is_pcie(dev))
 		return 0;
 
-	if (pcie_bus_config == PCIE_BUS_TUNE_OFF)
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
+	    pcie_bus_config == PCIE_BUS_DEFAULT)
 		return 0;
 
 	mps = 128 << *(u8 *)data;
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index e9fd0e90fa3b..55f2c208bd0e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2862,7 +2862,8 @@ static void quirk_intel_mc_errata(struct pci_dev *dev)
 	int err;
 	u16 rcc;
 
-	if (pcie_bus_config == PCIE_BUS_TUNE_OFF)
+	if (pcie_bus_config == PCIE_BUS_TUNE_OFF ||
+	    pcie_bus_config == PCIE_BUS_DEFAULT)
 		return;
 
 	/* Intel errata specifies bits to change but does not say what they are.
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4d4f9d29fcc9..0007942923a7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -738,10 +738,11 @@ struct pci_driver {
 void pcie_bus_configure_settings(struct pci_bus *bus);
 
 enum pcie_bus_config_types {
-	PCIE_BUS_TUNE_OFF,
-	PCIE_BUS_SAFE,
-	PCIE_BUS_PERFORMANCE,
-	PCIE_BUS_PEER2PEER,
+	PCIE_BUS_TUNE_OFF,	/* don't touch MPS at all */
+	PCIE_BUS_DEFAULT,	/* ensure MPS matches upstream bridge */
+	PCIE_BUS_SAFE,		/* use largest MPS boot-time devices support */
+	PCIE_BUS_PERFORMANCE,	/* use MPS and MRRS for best performance */
+	PCIE_BUS_PEER2PEER,	/* set MPS = 128 for all devices */
 };
 
 extern enum pcie_bus_config_types pcie_bus_config;
-- 
cgit v1.2.3