summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@novell.com>2006-03-24 03:15:54 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-24 07:33:21 -0800
commita4a6198b80cf82eb8160603c98da218d1bd5e104 (patch)
tree8c59e9088840b6b95e46c00ddda4fd7a134154c2
parentc98d8cfbc600af88e9e6cffc84dd342280445760 (diff)
downloadlwn-a4a6198b80cf82eb8160603c98da218d1bd5e104.tar.gz
lwn-a4a6198b80cf82eb8160603c98da218d1bd5e104.zip
[PATCH] tvec_bases too large for per-cpu data
With internal Xen-enabled kernels we see the kernel's static per-cpu data area exceed the limit of 32k on x86-64, and even native x86-64 kernels get fairly close to that limit. I generally question whether it is reasonable to have data structures several kb in size allocated as per-cpu data when the space there is rather limited. The biggest arch-independent consumer is tvec_bases (over 4k on 32-bit archs, over 8k on 64-bit ones), which now gets converted to use dynamically allocated memory instead. Signed-off-by: Jan Beulich <jbeulich@novell.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/timer.c45
1 files changed, 34 insertions, 11 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 2410c18dbeb1..4427e725ccdd 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -86,7 +86,8 @@ struct tvec_t_base_s {
} ____cacheline_aligned_in_smp;
typedef struct tvec_t_base_s tvec_base_t;
-static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
+static DEFINE_PER_CPU(tvec_base_t *, tvec_bases);
+static tvec_base_t boot_tvec_bases;
static inline void set_running_timer(tvec_base_t *base,
struct timer_list *timer)
@@ -157,7 +158,7 @@ EXPORT_SYMBOL(__init_timer_base);
void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
- timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
+ timer->base = &per_cpu(tvec_bases, raw_smp_processor_id())->t_base;
}
EXPORT_SYMBOL(init_timer);
@@ -218,7 +219,7 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
ret = 1;
}
- new_base = &__get_cpu_var(tvec_bases);
+ new_base = __get_cpu_var(tvec_bases);
if (base != &new_base->t_base) {
/*
@@ -258,7 +259,7 @@ EXPORT_SYMBOL(__mod_timer);
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
- tvec_base_t *base = &per_cpu(tvec_bases, cpu);
+ tvec_base_t *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
BUG_ON(timer_pending(timer) || !timer->function);
@@ -504,7 +505,7 @@ unsigned long next_timer_interrupt(void)
}
hr_expires += jiffies;
- base = &__get_cpu_var(tvec_bases);
+ base = __get_cpu_var(tvec_bases);
spin_lock(&base->t_base.lock);
expires = base->timer_jiffies + (LONG_MAX >> 1);
list = NULL;
@@ -901,7 +902,7 @@ EXPORT_SYMBOL(xtime_lock);
*/
static void run_timer_softirq(struct softirq_action *h)
{
- tvec_base_t *base = &__get_cpu_var(tvec_bases);
+ tvec_base_t *base = __get_cpu_var(tvec_bases);
hrtimer_run_queues();
if (time_after_eq(jiffies, base->timer_jiffies))
@@ -1256,12 +1257,32 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
return 0;
}
-static void __devinit init_timers_cpu(int cpu)
+static int __devinit init_timers_cpu(int cpu)
{
int j;
tvec_base_t *base;
- base = &per_cpu(tvec_bases, cpu);
+ base = per_cpu(tvec_bases, cpu);
+ if (!base) {
+ static char boot_done;
+
+ /*
+ * Cannot do allocation in init_timers as that runs before the
+ * allocator initializes (and would waste memory if there are
+ * more possible CPUs than will ever be installed/brought up).
+ */
+ if (boot_done) {
+ base = kmalloc_node(sizeof(*base), GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!base)
+ return -ENOMEM;
+ memset(base, 0, sizeof(*base));
+ } else {
+ base = &boot_tvec_bases;
+ boot_done = 1;
+ }
+ per_cpu(tvec_bases, cpu) = base;
+ }
spin_lock_init(&base->t_base.lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1273,6 +1294,7 @@ static void __devinit init_timers_cpu(int cpu)
INIT_LIST_HEAD(base->tv1.vec + j);
base->timer_jiffies = jiffies;
+ return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1295,8 +1317,8 @@ static void __devinit migrate_timers(int cpu)
int i;
BUG_ON(cpu_online(cpu));
- old_base = &per_cpu(tvec_bases, cpu);
- new_base = &get_cpu_var(tvec_bases);
+ old_base = per_cpu(tvec_bases, cpu);
+ new_base = get_cpu_var(tvec_bases);
local_irq_disable();
spin_lock(&new_base->t_base.lock);
@@ -1326,7 +1348,8 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
long cpu = (long)hcpu;
switch(action) {
case CPU_UP_PREPARE:
- init_timers_cpu(cpu);
+ if (init_timers_cpu(cpu) < 0)
+ return NOTIFY_BAD;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD: