From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 20 Jul 2011 15:20:54 -0500 Subject: PCI: Set PCI-E Max Payload Size on fabric On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- include/linux/pci.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b3b724..1ff9bbafd932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -251,7 +251,8 @@ struct pci_dev { u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 pcie_cap; /* PCI-E capability offset */ - u8 pcie_type; /* PCI-E device/port type */ + u8 pcie_type:4; /* PCI-E device/port type */ + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -617,6 +618,16 @@ struct pci_driver { /* these external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI +extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); + +enum pcie_bus_config_types { + PCIE_BUS_PERFORMANCE, + PCIE_BUS_SAFE, + PCIE_BUS_PEER2PEER, +}; + +extern enum pcie_bus_config_types pcie_bus_config; + extern struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch specific pci @@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pcie_get_mps(struct pci_dev *dev); +int pcie_set_mps(struct pci_dev *dev, int mps); int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit v1.2.3 From 2bbc6942273b5b3097bd265d82227bdd84b351b2 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:39 -0700 Subject: PCI : ability to relocate assigned pci-resources Currently pci-bridges are allocated enough resources to satisfy their immediate requirements. Any additional resource-requests fail if additional free space, contiguous to the one already allocated, is not available. This behavior is not reasonable since sufficient contiguous resources, that can satisfy the request, are available at a different location. This patch provides the ability to expand and relocate a allocated resource. v2: Changelog: Fixed size calculation in pci_reassign_resource() v3: Changelog : Split this patch. The resource.c changes are already upstream. All the pci driver changes are in here. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 27 +++++---- drivers/pci/setup-res.c | 152 ++++++++++++++++++++++++++++++------------------ include/linux/pci.h | 1 + 3 files changed, 113 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4409cd0e15fa..1796c6ffe91c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -34,6 +34,7 @@ struct resource_list_x { resource_size_t start; resource_size_t end; resource_size_t add_size; + resource_size_t min_align; unsigned long flags; }; @@ -65,7 +66,7 @@ void pci_realloc(void) */ static void add_to_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res, - resource_size_t add_size) + resource_size_t add_size, resource_size_t min_align) { struct resource_list_x *list = head; struct resource_list_x *ln = list->next; @@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head, tmp->end = res->end; tmp->flags = res->flags; tmp->add_size = add_size; + tmp->min_align = min_align; list->next = tmp; } static void add_to_failed_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res) { - add_to_list(head, dev, res, 0); + add_to_list(head, dev, res, + 0 /* dont care */, + 0 /* dont care */); } static void __dev_sort_resources(struct pci_dev *dev, @@ -159,13 +163,16 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; - if (!resource_size(res) && add_size) { - res->end = res->start + add_size - 1; - if(pci_assign_resource(list->dev, idx)) + if (!resource_size(res)) { + res->end = res->start + add_size - 1; + if(pci_assign_resource(list->dev, idx)) reset_resource(res); - } else if (add_size) { - adjust_resource(res, res->start, - resource_size(res) + add_size); + } else { + resource_size_t align = list->min_align; + res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); + if (pci_reassign_resource(list->dev, idx, add_size, align)) + dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", + res); } out: tmp = list; @@ -619,7 +626,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, 4096); } /** @@ -722,7 +729,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, min_align); return 1; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 319f359906e8..51a9095c7da4 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev) } #endif /* CONFIG_PCI_QUIRKS */ + + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, - int resno) + int resno, resource_size_t size, resource_size_t align) { struct resource *res = dev->resource + resno; - resource_size_t size, min, align; + resource_size_t min; int ret; - size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = pci_resource_alignment(dev, res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, pcibios_align_resource, dev); } + return ret; +} - if (ret < 0 && dev->fw_addr[resno]) { - struct resource *root, *conflict; - resource_size_t start, end; +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, + int resno, resource_size_t size) +{ + struct resource *root, *conflict; + resource_size_t start, end; + int ret = 0; - /* - * If we failed to assign anything, let's try the address - * where firmware left it. That at least has a chance of - * working, which is better than just leaving it disabled. - */ + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + + start = res->start; + end = res->end; + res->start = dev->fw_addr[resno]; + res->end = res->start + size - 1; + dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", + resno, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", resno, + res, conflict->name, conflict); + res->start = start; + res->end = end; + ret = 1; + } + return ret; +} + +static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + struct pci_bus *bus; + int ret; + char *type; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; + bus = dev->bus; + while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) { + if (!bus->parent || !bus->self->transparent) + break; + bus = bus->parent; + } + + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; else - root = &iomem_resource; - - start = res->start; - end = res->end; - res->start = dev->fw_addr[resno]; - res->end = res->start + size - 1; - dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", - resno, res); - conflict = request_resource_conflict(root, res); - if (conflict) { - dev_info(&dev->dev, - "BAR %d: %pR conflicts with %s %pR\n", resno, - res, conflict->name, conflict); - res->start = start; - res->end = end; - } else - ret = 0; + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); } + return ret; +} + +int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize, + resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + resource_size_t new_size; + int ret; + + if (!res->parent) { + dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " + "\n", resno, res); + return -EINVAL; + } + + new_size = resource_size(res) + addsize + min_align; + ret = _pci_assign_resource(dev, resno, new_size, min_align); if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } - return ret; } int pci_assign_resource(struct pci_dev *dev, int resno) { struct resource *res = dev->resource + resno; - resource_size_t align; + resource_size_t align, size; struct pci_bus *bus; int ret; - char *type; align = pci_resource_alignment(dev, res); if (!align) { @@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } bus = dev->bus; - while ((ret = __pci_assign_resource(bus, dev, resno))) { - if (bus->parent && bus->self->transparent) - bus = bus->parent; - else - bus = NULL; - if (bus) - continue; - break; - } + size = resource_size(res); + ret = _pci_assign_resource(dev, resno, size, align); - if (ret) { - if (res->flags & IORESOURCE_MEM) - if (res->flags & IORESOURCE_PREFETCH) - type = "mem pref"; - else - type = "mem"; - else if (res->flags & IORESOURCE_IO) - type = "io"; - else - type = "unknown"; - dev_info(&dev->dev, - "BAR %d: can't assign %s (size %#llx)\n", - resno, type, (unsigned long long) resource_size(res)); - } + /* + * If we failed to assign anything, let's try the address + * where firmware left it. That at least has a chance of + * working, which is better than just leaving it disabled. + */ + if (ret < 0 && dev->fw_addr[resno]) + ret = pci_revert_fw_address(res, dev, resno, size); + if (!ret) { + res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); + if (resno < PCI_BRIDGE_RESOURCES) + pci_update_resource(dev, resno); + } return ret; } + /* Sort resources by alignment */ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 1ff9bbafd932..8c230cbcbb48 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -813,6 +813,7 @@ int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ -- cgit v1.2.3