From 5f7865f3e44db4c73fdc454fb2af40806212a7ca Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 25 Sep 2012 21:12:30 +0800 Subject: sched: Ensure 'sched_domains_numa_levels' is safe to use in other functions We should temporarily reset 'sched_domains_numa_levels' to 0 after it is reset to 'level' in sched_init_numa(). If it fails to allocate memory for array sched_domains_numa_masks[][], the array will contain less then 'level' members. This could be dangerous when we use it to iterate array sched_domains_numa_masks[][] in other functions. This patch set sched_domains_numa_levels to 0 before initializing array sched_domains_numa_masks[][], and reset it to 'level' when sched_domains_numa_masks[][] is fully initialized. Signed-off-by: Tang Chen Signed-off-by: Wen Congyang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1348578751-16904-2-git-send-email-tangchen@cn.fujitsu.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c17747236438..f895fdd32c5b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6122,6 +6122,17 @@ static void sched_init_numa(void) * numbers. */ + /* + * Here, we should temporarily reset sched_domains_numa_levels to 0. + * If it fails to allocate memory for array sched_domains_numa_masks[][], + * the array will contain less then 'level' members. This could be + * dangerous when we use it to iterate array sched_domains_numa_masks[][] + * in other functions. + * + * We reset it to 'level' at the end of this function. + */ + sched_domains_numa_levels = 0; + sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); if (!sched_domains_numa_masks) return; @@ -6176,6 +6187,8 @@ static void sched_init_numa(void) } sched_domain_topology = tl; + + sched_domains_numa_levels = level; } #else static inline void sched_init_numa(void) -- cgit v1.2.3 From 301a5cba2887d1f640e6d5184b05a6d7132017d5 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 25 Sep 2012 21:12:31 +0800 Subject: sched: Update sched_domains_numa_masks[][] when new cpus are onlined Once array sched_domains_numa_masks[] []is defined, it is never updated. When a new cpu on a new node is onlined, the coincident member in sched_domains_numa_masks[][] is not initialized, and all the masks are 0. As a result, the build_overlap_sched_groups() will initialize a NULL sched_group for the new cpu on the new node, which will lead to kernel panic: [ 3189.403280] Call Trace: [ 3189.403286] [] warn_slowpath_common+0x7f/0xc0 [ 3189.403289] [] warn_slowpath_null+0x1a/0x20 [ 3189.403292] [] build_sched_domains+0x467/0x470 [ 3189.403296] [] partition_sched_domains+0x307/0x510 [ 3189.403299] [] ? partition_sched_domains+0x142/0x510 [ 3189.403305] [] cpuset_update_active_cpus+0x83/0x90 [ 3189.403308] [] cpuset_cpu_active+0x38/0x70 [ 3189.403316] [] notifier_call_chain+0x67/0x150 [ 3189.403320] [] ? native_cpu_up+0x18a/0x1b5 [ 3189.403328] [] __raw_notifier_call_chain+0xe/0x10 [ 3189.403333] [] __cpu_notify+0x20/0x40 [ 3189.403337] [] _cpu_up+0xe9/0x131 [ 3189.403340] [] cpu_up+0xdb/0xee [ 3189.403348] [] store_online+0x9c/0xd0 [ 3189.403355] [] dev_attr_store+0x20/0x30 [ 3189.403361] [] sysfs_write_file+0xa3/0x100 [ 3189.403368] [] vfs_write+0xd0/0x1a0 [ 3189.403371] [] sys_write+0x54/0xa0 [ 3189.403375] [] system_call_fastpath+0x16/0x1b [ 3189.403377] ---[ end trace 1e6cf85d0859c941 ]--- [ 3189.403398] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 This patch registers a new notifier for cpu hotplug notify chain, and updates sched_domains_numa_masks every time a new cpu is onlined or offlined. Signed-off-by: Tang Chen Signed-off-by: Wen Congyang [ fixed compile warning ] Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1348578751-16904-3-git-send-email-tangchen@cn.fujitsu.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f895fdd32c5b..8322d73b4392 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6190,10 +6190,65 @@ static void sched_init_numa(void) sched_domains_numa_levels = level; } + +static void sched_domains_numa_masks_set(int cpu) +{ + int i, j; + int node = cpu_to_node(cpu); + + for (i = 0; i < sched_domains_numa_levels; i++) { + for (j = 0; j < nr_node_ids; j++) { + if (node_distance(j, node) <= sched_domains_numa_distance[i]) + cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); + } + } +} + +static void sched_domains_numa_masks_clear(int cpu) +{ + int i, j; + for (i = 0; i < sched_domains_numa_levels; i++) { + for (j = 0; j < nr_node_ids; j++) + cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); + } +} + +/* + * Update sched_domains_numa_masks[level][node] array when new cpus + * are onlined. + */ +static int sched_domains_numa_masks_update(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + sched_domains_numa_masks_set(cpu); + break; + + case CPU_DEAD: + sched_domains_numa_masks_clear(cpu); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} #else static inline void sched_init_numa(void) { } + +static int sched_domains_numa_masks_update(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + return 0; +} #endif /* CONFIG_NUMA */ static int __sdt_alloc(const struct cpumask *cpu_map) @@ -6642,6 +6697,7 @@ void __init sched_init_smp(void) mutex_unlock(&sched_domains_mutex); put_online_cpus(); + hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE); hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE); hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE); -- cgit v1.2.3