From 57741a779070e0b141b6148136b420c8d35ccbce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 15 Feb 2008 01:32:50 -0800 Subject: x86_64: set cfg_size for AMD Family 10h in case MMCONFIG reuse pci_cfg_space_size but skip check pci express and pci-x CAP ID. Signed-off-by: Yinghai Lu Cc: Andrew Morton Acked-by: Greg Kroah-Hartman Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- drivers/pci/probe.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f991359f0c36..a8efdaef1870 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,11 +842,14 @@ static void set_pcie_port_type(struct pci_dev *pdev) * reading the dword at 0x100 which must either be 0 or a valid extended * capability header. */ -int pci_cfg_space_size(struct pci_dev *dev) +int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix) { int pos; u32 status; + if (!check_exp_pcix) + goto skip; + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) { pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); @@ -858,6 +861,7 @@ int pci_cfg_space_size(struct pci_dev *dev) goto fail; } + skip: if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) goto fail; if (status == 0xffffffff) @@ -869,6 +873,11 @@ int pci_cfg_space_size(struct pci_dev *dev) return PCI_CFG_SPACE_SIZE; } +int pci_cfg_space_size(struct pci_dev *dev) +{ + return pci_cfg_space_size_ext(dev, 1); +} + static void pci_release_bus_bridge_dev(struct device *dev) { kfree(dev); -- cgit v1.2.3 From 0d358f22f6c8f03ab215eee8d52b74f78cc3c7db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:20:41 -0800 Subject: driver core: try parent numa_node at first before using default in the device_add, we try to use use parent numa_node. need to make sure pci root bus's bridge device numa_node is set. then we could use device->numa_node direclty for all device. and don't need to call pcibus_to_node(). Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- drivers/base/core.c | 14 ++++++++++++-- drivers/pci/probe.c | 4 +++- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/base/core.c b/drivers/base/core.c index 9248e0927d08..be288b5e4180 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -787,6 +787,10 @@ int device_add(struct device *dev) parent = get_device(dev->parent); setup_parent(dev, parent); + /* use parent numa_node */ + if (parent) + set_dev_node(dev, dev_to_node(parent)); + /* first, register with generic layer. */ error = kobject_add(&dev->kobj, dev->kobj.parent, "%s", dev->bus_id); if (error) @@ -1306,8 +1310,11 @@ int device_move(struct device *dev, struct device *new_parent) dev->parent = new_parent; if (old_parent) klist_remove(&dev->knode_parent); - if (new_parent) + if (new_parent) { klist_add_tail(&dev->knode_parent, &new_parent->klist_children); + set_dev_node(dev, dev_to_node(new_parent)); + } + if (!dev->class) goto out_put; error = device_move_class_links(dev, old_parent, new_parent); @@ -1317,9 +1324,12 @@ int device_move(struct device *dev, struct device *new_parent) if (!kobject_move(&dev->kobj, &old_parent->kobj)) { if (new_parent) klist_remove(&dev->knode_parent); - if (old_parent) + dev->parent = old_parent; + if (old_parent) { klist_add_tail(&dev->knode_parent, &old_parent->klist_children); + set_dev_node(dev, dev_to_node(old_parent)); + } } cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a8efdaef1870..a40043bd3257 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -973,7 +973,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.release = pci_release_dev; pci_dev_get(dev); - set_dev_node(&dev->dev, pcibus_to_node(bus)); dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; @@ -1128,6 +1127,9 @@ struct pci_bus * pci_create_bus(struct device *parent, goto dev_reg_err; b->bridge = get_device(dev); + if (!parent) + set_dev_node(b->bridge, pcibus_to_node(b)); + b->dev.class = &pcibus_class; b->dev.parent = b->bridge; sprintf(b->dev.bus_id, "%04x:%02x", pci_domain_nr(b), bus); -- cgit v1.2.3 From 30a18d6c3f1e774de656ebd8ff219d53e2ba4029 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Feb 2008 03:21:20 -0800 Subject: x86: multi pci root bus with different io resource range, on 64-bit scan AMD opteron io/mmio routing to make sure every pci root bus get correct resource range. Thus later pci scan could assign correct resource to device with unassigned resource. this can fix a system without _CRS for multi pci root bus. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/pci/Makefile_64 | 2 +- arch/x86/pci/k8-bus_64.c | 404 +++++++++++++++++++++++++++++++++++++++------ drivers/pci/probe.c | 6 + include/asm-x86/topology.h | 3 + include/linux/pci.h | 2 +- 5 files changed, 365 insertions(+), 52 deletions(-) (limited to 'drivers/pci') diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 index 7d8c467bf143..8fbd19832cf6 100644 --- a/arch/x86/pci/Makefile_64 +++ b/arch/x86/pci/Makefile_64 @@ -13,5 +13,5 @@ obj-y += legacy.o irq.o common.o early.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o -obj-$(CONFIG_NUMA) += k8-bus_64.o +obj-y += k8-bus_64.o diff --git a/arch/x86/pci/k8-bus_64.c b/arch/x86/pci/k8-bus_64.c index dab38310ee97..5e8a9d105edd 100644 --- a/arch/x86/pci/k8-bus_64.c +++ b/arch/x86/pci/k8-bus_64.c @@ -7,23 +7,29 @@ /* * This discovers the pcibus <-> node mapping on AMD K8. - * - * RED-PEN need to call this again on PCI hotplug - * RED-PEN empty cpus get reported wrong + * also get peer root bus resource for io,mmio */ -#define NODE_ID(dword) ((dword>>4) & 0x07) -#define LDT_BUS_NUMBER_REGISTER_0 0xE0 -#define LDT_BUS_NUMBER_REGISTER_1 0xE4 -#define LDT_BUS_NUMBER_REGISTER_2 0xE8 -#define LDT_BUS_NUMBER_REGISTER_3 0xEC -#define NR_LDT_BUS_NUMBER_REGISTERS 4 -#define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 24) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 -#define PCI_DEVICE_ID_K8_10H_HTCONFIG 0x1200 -#define PCI_DEVICE_ID_K8_11H_HTCONFIG 0x1300 +/* + * sub bus (transparent) will use entres from 3 to store extra from root, + * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +static int pci_root_num; +static struct pci_root_info pci_root_info[PCI_ROOT_NR]; #ifdef CONFIG_NUMA @@ -55,77 +61,375 @@ int get_mp_bus_to_node(int busnum) return node; } - #endif +void set_pci_bus_resources_arch_default(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + if (!pci_root_num) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +#define RANGE_NUM 16 + +struct res_range { + size_t start; + size_t end; +}; + +static void __init update_range(struct res_range *range, size_t start, + size_t end) +{ + int i; + int j; + + for (j = 0; j < RANGE_NUM; j++) { + if (!range[j].end) + continue; + if (start == range[j].start && end < range[j].end) { + range[j].start = end + 1; + break; + } else if (start == range[j].start && end == range[j].end) { + range[j].start = 0; + range[j].end = 0; + break; + } else if (start > range[j].start && end == range[j].end) { + range[j].end = start - 1; + break; + } else if (start > range[j].start && end < range[j].end) { + /* find the new spare */ + for (i = 0; i < RANGE_NUM; i++) { + if (range[i].end == 0) + break; + } + if (i < RANGE_NUM) { + range[i].end = range[j].end; + range[i].start = end + 1; + } else { + printk(KERN_ERR "run of slot in ranges\n"); + } + range[j].end = start - 1; + break; + } + } +} + +static void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + res = &info->res[i]; + if (res->flags != flags) + continue; + if (res->end + 1 == start) { + res->end = end; + return; + } else if (end + 1 == res->start) { + res->start = start; + return; + } + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} + +struct pci_hostbridge_probe { + u32 bus; + u32 slot; + u32 vendor; + u32 device; +}; + +static struct pci_hostbridge_probe pci_probes[] __initdata = { + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1100 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, + { 0, 0x18, PCI_VENDOR_ID_AMD, 0x1300 }, +}; + /** * early_fill_mp_bus_to_node() * called before pcibios_scan_root and pci_scan_bus * fills the mp_bus_to_cpumask array based according to the LDT Bus Number * Registers found in the K8 northbridge */ -__init static int -early_fill_mp_bus_to_node(void) +static int __init early_fill_mp_bus_info(void) { -#ifdef CONFIG_NUMA - int i, j; + int i; + int j; + unsigned bus; unsigned slot; - u32 ldtbus; - u32 id; + int found; int node; - u16 deviceid; - u16 vendorid; - int min_bus; - int max_bus; - - static int lbnr[NR_LDT_BUS_NUMBER_REGISTERS] = { - LDT_BUS_NUMBER_REGISTER_0, - LDT_BUS_NUMBER_REGISTER_1, - LDT_BUS_NUMBER_REGISTER_2, - LDT_BUS_NUMBER_REGISTER_3 - }; + int link; + int def_node; + int def_link; + struct pci_root_info *info; + u32 reg; + struct resource *res; + size_t start; + size_t end; + struct res_range range[RANGE_NUM]; + u64 val; + u32 address; +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) mp_bus_to_node[i] = -1; +#endif if (!early_pci_allowed()) return -1; - slot = 0x18; - id = read_pci_config(0, slot, 0, PCI_VENDOR_ID); + found = 0; + for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { + u32 id; + u16 device; + u16 vendor; - vendorid = id & 0xffff; - if (vendorid != PCI_VENDOR_ID_AMD) - goto out; + bus = pci_probes[i].bus; + slot = pci_probes[i].slot; + id = read_pci_config(bus, slot, 0, PCI_VENDOR_ID); - deviceid = (id>>16) & 0xffff; - if ((deviceid != PCI_DEVICE_ID_K8HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_10H_HTCONFIG) && - (deviceid != PCI_DEVICE_ID_K8_11H_HTCONFIG)) - goto out; + vendor = id & 0xffff; + device = (id>>16) & 0xffff; + if (pci_probes[i].vendor == vendor && + pci_probes[i].device == device) { + found = 1; + break; + } + } + + if (!found) + return 0; - for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { - ldtbus = read_pci_config(0, slot, 1, lbnr[i]); + pci_root_num = 0; + for (i = 0; i < 4; i++) { + int min_bus; + int max_bus; + reg = read_pci_config(bus, slot, 1, 0xe0 + (i << 2)); /* Check if that register is enabled for bus range */ - if ((ldtbus & 7) != 3) + if ((reg & 7) != 3) continue; - min_bus = SECONDARY_LDT_BUS_NUMBER(ldtbus); - max_bus = SUBORDINATE_LDT_BUS_NUMBER(ldtbus); - node = NODE_ID(ldtbus); + min_bus = (reg >> 16) & 0xff; + max_bus = (reg >> 24) & 0xff; + node = (reg >> 4) & 0x07; +#ifdef CONFIG_NUMA for (j = min_bus; j <= max_bus; j++) mp_bus_to_node[j] = (unsigned char) node; +#endif + link = (reg >> 8) & 0x03; + + info = &pci_root_info[pci_root_num]; + info->bus_min = min_bus; + info->bus_max = max_bus; + info->node = node; + info->link = link; + sprintf(info->name, "PCI Bus #%02x", min_bus); + pci_root_num++; } -out: + /* get the default node and link for left over res */ + reg = read_pci_config(bus, slot, 0, 0x60); + def_node = (reg >> 8) & 0x07; + reg = read_pci_config(bus, slot, 0, 0x64); + def_link = (reg >> 8) & 0x03; + + memset(range, 0, sizeof(range)); + range[0].end = 0xffff; + /* io port resource */ + for (i = 0; i < 4; i++) { + reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xfff000; + reg = read_pci_config(bus, slot, 1, 0xc4 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xfff000) | 0xfff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_IO, 0); + update_range(range, start, end); + } + /* add left over io port range to def node/link, [0, 0xffff] */ + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_IO, 1); + } + } + + memset(range, 0, sizeof(range)); + /* 0xfd00000000-0xffffffffff for HT */ + /* 0xfc00000000-0xfcffffffff for Family 10h mmconfig*/ + range[0].end = 0xfbffffffffULL; + + /* need to take out [0, TOM) for RAM*/ + address = MSR_K8_TOP_MEM1; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); + if (end < (1ULL<<32)) + update_range(range, 0, end - 1); + + /* mmio resource */ + for (i = 0; i < 8; i++) { + reg = read_pci_config(bus, slot, 1, 0x80 + (i << 3)); + if (!(reg & 3)) + continue; + + start = reg & 0xffffff00; /* 39:16 on 31:8*/ + start <<= 8; + reg = read_pci_config(bus, slot, 1, 0x84 + (i << 3)); + node = reg & 0x07; + link = (reg >> 4) & 0x03; + end = (reg & 0xffffff00); + end <<= 8; + end |= 0xffff; + + /* find the position */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == node && info->link == link) + break; + } + if (j == pci_root_num) + continue; /* not found */ + + info = &pci_root_info[j]; + update_res(info, start, end, IORESOURCE_MEM, 0); + update_range(range, start, end); + } + + /* need to take out [4G, TOM2) for RAM*/ + /* SYS_CFG */ + address = MSR_K8_SYSCFG; + rdmsrl(address, val); + /* TOP_MEM2 is enabled? */ + if (val & (1<<21)) { + /* TOP_MEM2 */ + address = MSR_K8_TOP_MEM2; + rdmsrl(address, val); + end = (val & 0xffffff8000000ULL); + printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); + update_range(range, 1ULL<<32, end - 1); + } + + /* + * add left over mmio range to def node/link ? + * that is tricky, just record range in from start_min to 4G + */ + for (j = 0; j < pci_root_num; j++) { + info = &pci_root_info[j]; + if (info->node == def_node && info->link == def_link) + break; + } + if (j < pci_root_num) { + info = &pci_root_info[j]; + + for (i = 0; i < RANGE_NUM; i++) { + if (!range[i].end) + continue; + + update_res(info, range[i].start, range[i].end, + IORESOURCE_MEM, 1); + } + } + +#ifdef CONFIG_NUMA for (i = 0; i < BUS_NR; i++) { node = mp_bus_to_node[i]; if (node >= 0) printk(KERN_DEBUG "bus: %02x to node: %02x\n", i, node); } #endif + + for (i = 0; i < pci_root_num; i++) { + int res_num; + int busnum; + + info = &pci_root_info[i]; + res_num = info->res_num; + busnum = info->bus_min; + printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + info->bus_min, info->bus_max, info->node, info->link); + for (j = 0; j < res_num; j++) { + res = &info->res[j]; + printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", + busnum, j, + (res->flags & IORESOURCE_IO)?"io port":"mmio", + res->start, res->end); + } + } + return 0; } -postcore_initcall(early_fill_mp_bus_to_node); +postcore_initcall(early_fill_mp_bus_info); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index a40043bd3257..4a55bf380957 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1088,6 +1088,10 @@ unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) return max; } +void __attribute__((weak)) set_pci_bus_resources_arch_default(struct pci_bus *b) +{ +} + struct pci_bus * pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { @@ -1147,6 +1151,8 @@ struct pci_bus * pci_create_bus(struct device *parent, b->resource[0] = &ioport_resource; b->resource[1] = &iomem_resource; + set_pci_bus_resources_arch_default(b); + return b; dev_create_file_err: diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 4793ae745a78..0e6d6b03affe 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -193,6 +193,9 @@ extern cpumask_t cpu_coregroup_map(int cpu); #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif +struct pci_bus; +void set_pci_bus_resources_arch_default(struct pci_bus *b); + #ifdef CONFIG_SMP #define mc_capable() (boot_cpu_data.x86_max_cores > 1) #define smt_capable() (smp_num_siblings > 1) diff --git a/include/linux/pci.h b/include/linux/pci.h index a71954a38932..abc998ffb66e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -254,7 +254,7 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev, #define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 16 #endif #define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ -- cgit v1.2.3