From ca7e716c7833aeaeb8fedd6d004c5f5d5e14d325 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Dec 2008 15:46:01 -0800 Subject: Revert "sched_clock: prevent scd->clock from moving backwards" This reverts commit 5b7dba4ff834259a5623e03a565748704a8fe449, which caused a regression in hibernate, reported by and bisected by Fabio Comolli. This revert fixes http://bugzilla.kernel.org/show_bug.cgi?id=12155 http://bugzilla.kernel.org/show_bug.cgi?id=12149 Bisected-by: Fabio Comolli Requested-by: Rafael J. Wysocki Acked-by: Dave Kleikamp Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/sched_clock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 81787248b60f..e8ab096ddfe3 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -118,13 +118,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) /* * scd->clock = clamp(scd->tick_gtod + delta, - * max(scd->tick_gtod, scd->clock), - * max(scd->clock, scd->tick_gtod + TICK_NSEC)); + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); */ clock = scd->tick_gtod + delta; min_clock = wrap_max(scd->tick_gtod, scd->clock); - max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); + max_clock = scd->tick_gtod + TICK_NSEC; clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); -- cgit v1.2.3 From 307257cf475aac25db30b669987f13d90c934e3a Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Mon, 15 Dec 2008 13:54:22 -0800 Subject: cgroups: fix a race between rmdir and remount When a cgroup is removed, it's unlinked from its parent's children list, but not actually freed until the last dentry on it is released (at which point cgrp->root->number_of_cgroups is decremented). Currently rebind_subsystems checks for the top cgroup's child list being empty in order to rebind subsystems into or out of a hierarchy - this can result in the set of subsystems bound to a hierarchy being removed-but-not-freed cgroup. The simplest fix for this is to forbid remounts that change the set of subsystems on a hierarchy that has removed-but-not-freed cgroups. This bug can be reproduced via: mkdir /mnt/cg mount -t cgroup -o ns,freezer cgroup /mnt/cg mkdir /mnt/cg/foo sleep 1h < /mnt/cg/foo & rmdir /mnt/cg/foo mount -t cgroup -o remount,ns,devices,freezer cgroup /mnt/cg kill $! Though the above will cause oops in -mm only but not mainline, but the bug can cause memory leak in mainline (and even oops) Signed-off-by: Paul Menage Reviewed-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fe00b3b983a8..8185a0f09594 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -702,7 +702,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, * any child cgroups exist. This is theoretically supportable * but involves complex error handling, so it's being left until * later */ - if (!list_empty(&cgrp->children)) + if (root->number_of_cgroups > 1) return -EBUSY; /* Process each subsystem */ -- cgit v1.2.3 From 80f40ee4a07530cc3acbc239a9299ec47025825b Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 15 Dec 2008 11:56:48 +0530 Subject: sched: use RCU variant of list traversal in for_each_leaf_rt_rq() Impact: fix potential of rare crash for_each_leaf_rt_rq() walks an RCU protected list (rq->leaf_rt_rq_list), but doesn't use list_for_each_entry_rcu(). Fix this. Signed-off-by: Bharata B Rao Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d9ba9d5f99d6..7bdf84c85ccd 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -77,7 +77,7 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq) } #define for_each_leaf_rt_rq(rt_rq, rq) \ - list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) + list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list) static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) { -- cgit v1.2.3 From 3d44cc3e01ee1b40317f79ed54324e25c4f848df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 20 Dec 2008 21:27:34 +0100 Subject: Null pointer deref with hrtimer_try_to_cancel() Impact: Prevent kernel crash with posix timer clockid CLOCK_MONOTONIC_RAW commit 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68 (clocksource: introduce CLOCK_MONOTONIC_RAW) introduced a new clockid, which is only available to read out the raw not NTP adjusted system time. The above commit did not prevent that a posix timer can be created with that clockid. The timer_create() syscall succeeds and initializes the timer to a non existing hrtimer base. When the timer is deleted either by timer_delete() or by the exit() cleanup the kernel crashes. Prevent the creation of timers for CLOCK_MONOTONIC_RAW by setting the posix clock function to no_timer_create which returns an error code. Reported-and-tested-by: Eric Sesterhenn Signed-off-by: Thomas Gleixner Acked-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-timers.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5e79c662294b..a140e44eebba 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -197,6 +197,11 @@ static int common_timer_create(struct k_itimer *new_timer) return 0; } +static int no_timer_create(struct k_itimer *new_timer) +{ + return -EOPNOTSUPP; +} + /* * Return nonzero if we know a priori this clockid_t value is bogus. */ @@ -248,6 +253,7 @@ static __init int init_posix_timers(void) .clock_getres = hrtimer_get_res, .clock_get = posix_get_monotonic_raw, .clock_set = do_posix_clock_nosettime, + .timer_create = no_timer_create, }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); -- cgit v1.2.3 From e368d3a836797ddf193b1ec18c97407a791d2451 Mon Sep 17 00:00:00 2001 From: Sharyathi Nagesh Date: Tue, 23 Dec 2008 13:57:12 -0800 Subject: cgroups: suppress bogus warning messages Remove spurious warning messages that are thrown onto the console during cgroup operations. Signed-off-by: Alexey Dobriyan Signed-off-by: Sharyathi Nagesh Acked-by: Serge E. Hallyn Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8185a0f09594..a3415507bd0a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2934,9 +2934,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, again: root = subsys->root; if (root == &rootnode) { - printk(KERN_INFO - "Not cloning cgroup for unused subsystem %s\n", - subsys->name); mutex_unlock(&cgroup_mutex); return 0; } -- cgit v1.2.3 From 20ca9b3f4c6dfa0af8dd5b18a64df17eb994b54d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 23 Dec 2008 13:57:14 -0800 Subject: cgroups: avoid accessing uninitialized data in failure path If cgroup_get_rootdir() failed, free_cg_links() will be called in the failure path, but tmp_cg_links hasn't been initialized at that time. I introduced this bug in the 2.6.27 merge window. Signed-off-by: Li Zefan Acked-by: Serge Hallyn Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a3415507bd0a..2606d0fb4e54 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1024,7 +1024,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, if (ret == -EBUSY) { mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); - goto drop_new_super; + goto free_cg_links; } /* EBUSY should be the only error here */ @@ -1073,10 +1073,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type, return simple_set_mnt(mnt, sb); + free_cg_links: + free_cg_links(&tmp_cg_links); drop_new_super: up_write(&sb->s_umount); deactivate_super(sb); - free_cg_links(&tmp_cg_links); return ret; } -- cgit v1.2.3