From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 9 Feb 2010 18:01:44 -0800 Subject: x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 CPUs We need to fall back from logical-flat APIC mode to physical-flat mode when we have more than 8 CPUs. However, in the presence of CPU hotplug(with bios listing not enabled but possible cpus as disabled cpus in MADT), we have to consider the number of possible CPUs rather than the number of current CPUs; otherwise we may cross the 8-CPU boundary when CPUs are added later. 32bit apic code can use more cleanups (like the removal of vendor checks in 32bit default_setup_apic_routing()) and more unifications with 64bit code. Yinghai has some patches in works already. This patch addresses the boot issue that is reported in the virtualization guest context. [ hpa: incorporated function annotation feedback from Yinghai Lu ] Signed-off-by: Suresh Siddha LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com> Acked-by: Shaohui Zheng Reviewed-by: Yinghai Lu Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/apic.c | 17 ----------------- arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/probe_64.c | 2 +- 3 files changed, 28 insertions(+), 20 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3987e4408f75..dfca210f6a10 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void) #endif enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif verify_local_APIC(); connect_bsp_APIC(); @@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f6768c..99d2fe016084 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void default_setup_apic_routing(void) +void __init default_setup_apic_routing(void) +{ + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); +#endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); +} + +static void setup_apic_flat_routing(void) { #ifdef CONFIG_X86_IO_APIC printk(KERN_INFO @@ -103,7 +128,7 @@ struct apic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, + .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 450fe2064a14..83e9be4778e2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void) } #endif - if (apic == &apic_flat && num_processors > 8) + if (apic == &apic_flat && num_possible_cpus() > 8) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); -- cgit v1.2.3 From 18dce6ba5c8c6bd0f3ab4efa4cbdd698dab5c40a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:05 -0800 Subject: x86: Fix SCI on IOAPIC != 0 Thomas Renninger reported on IBM x3330 booting a latest kernel on this machine results in: PCI: PCI BIOS revision 2.10 entry at 0xfd61c, last bus=1 PCI: Using configuration type 1 for base access bio: create slab at 0 ACPI: SCI (IRQ30) allocation failed ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20090903/evevent-161) ACPI: Unable to start the ACPI Interpreter Later all kind of devices fail... and bisect it down to this commit: commit b9c61b70075c87a8612624736faf4a2de5b1ed30 x86/pci: update pirq_enable_irq() to setup io apic routing it turns out we need to set irq routing for the sci on ioapic1 early. -v2: make it work without sparseirq too. -v3: fix checkpatch.pl warning, and cc to stable Reported-by: Thomas Renninger Bisected-by: Thomas Renninger Tested-by: Thomas Renninger Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-2-git-send-email-yinghai@kernel.org> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 + arch/x86/kernel/acpi/boot.c | 9 +++++++- arch/x86/kernel/apic/io_apic.c | 50 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16cde1f8..5f61f6e0ffdd 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -160,6 +160,7 @@ extern int io_apic_get_redir_entries(int ioapic); struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); +void setup_IO_APIC_irq_extra(u32 gsi); extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); extern void ioapic_insert_resources(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdfa5ca4..5c96b75c6ea8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -446,6 +446,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { *irq = gsi; + +#ifdef CONFIG_X86_IO_APIC + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) + setup_IO_APIC_irq_extra(gsi); +#endif + return 0; } @@ -473,7 +479,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); } #endif - acpi_gsi_to_irq(plat_gsi, &irq); + irq = plat_gsi; + return irq; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816d..5e4cce254e43 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1538,6 +1538,56 @@ static void __init setup_IO_APIC_irqs(void) " (apicid-pin) not connected\n"); } +/* + * for the gsit that is not in first ioapic + * but could not use acpi_register_gsi() + * like some special sci in IBM x3330 + */ +void setup_IO_APIC_irq_extra(u32 gsi) +{ + int apic_id = 0, pin, idx, irq; + int node = cpu_to_node(boot_cpu_id); + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + apic_id = mp_find_ioapic(gsi); + if (apic_id < 0) + return; + + pin = mp_find_ioapic_pin(apic_id, gsi); + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) + return; + + irq = pin_2_irq(idx, apic_id, pin); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(irq); + if (desc) + return; +#endif + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return; + } + + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + + if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[apic_id].apicid, pin); + return; + } + set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); +} + /* * Set up the timer pin, possibly with the 8259A-master behind. */ -- cgit v1.2.3 From eb5b3794062824ba12d883901eea49ea89d0a678 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Feb 2010 13:02:50 -0800 Subject: x86, irq: Keep chip_data in create_irq_nr and destroy_irq Version 4: use get_irq_chip_data() in destroy_irq() to get rid of some local vars. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Brandon Phiilps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 20 ++++------------ include/linux/irq.h | 2 ++ kernel/irq/chip.c | 52 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel/apic') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5e4cce254e43..e93a76bc8670 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3305,19 +3302,12 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq, get_irq_chip_data(irq)); spin_unlock_irqrestore(&vector_lock, flags); } diff --git a/include/linux/irq.h b/include/linux/irq.h index 451481c082b5..4d9b26e044bc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq) /* Dynamic irq helper functions */ extern void dynamic_irq_init(unsigned int irq); +void dynamic_irq_init_keep_chip_data(unsigned int irq); extern void dynamic_irq_cleanup(unsigned int irq); +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); /* Set/get chip/data for an IRQ: */ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa28f666..d70394f12ee9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,11 +18,7 @@ #include "internals.h" -/** - * dynamic_irq_init - initialize a dynamically allocated irq - * @irq: irq number to initialize - */ -void dynamic_irq_init(unsigned int irq) +static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc; unsigned long flags; @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) desc->depth = 1; desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->action = NULL; desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) } /** - * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * dynamic_irq_init - initialize a dynamically allocated irq * @irq: irq number to initialize */ -void dynamic_irq_cleanup(unsigned int irq) +void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_init_x(irq, false); +} + +/** + * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_init_keep_chip_data(unsigned int irq) +{ + dynamic_irq_init_x(irq, true); +} + +static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) } desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, false); +} + +/** + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, true); +} + /** * set_irq_chip - set the irq chip for an irq -- cgit v1.2.3