From d8ac897137a230ec351269f6378017f2decca512 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 21 Sep 2016 14:38:10 +0100 Subject: sched/core: Add wrappers for lockdep_(un)pin_lock() In preparation for adding diagnostic checks to catch missing calls to update_rq_clock(), provide wrappers for (re)pinning and unpinning rq->lock. Because the pending diagnostic checks allow state to be maintained in rq_flags across pin contexts, swap the 'struct pin_cookie' arguments for 'struct rq_flags *'. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Byungchul Park Cc: Frederic Weisbecker Cc: Jan Kara Cc: Linus Torvalds Cc: Luca Abeni Cc: Mel Gorman Cc: Mike Galbraith Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Petr Mladek Cc: Rik van Riel Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: Wanpeng Li Cc: Yuyang Du Link: http://lkml.kernel.org/r/20160921133813.31976-5-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- kernel/sched/rt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2516b8df6dbb..88254be118b0 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1523,7 +1523,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) } static struct task_struct * -pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) +pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; @@ -1535,9 +1535,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie coo * disabled avoiding further scheduler activity on it and we're * being very careful to re-start the picking loop. */ - lockdep_unpin_lock(&rq->lock, cookie); + rq_unpin_lock(rq, rf); pull_rt_task(rq); - lockdep_repin_lock(&rq->lock, cookie); + rq_repin_lock(rq, rf); /* * pull_rt_task() can drop (and re-acquire) rq->lock; this * means a dl or stop task can slip in, in which case we need -- cgit v1.2.3 From 619bd4a71874a8fd78eb6ccf9f272c5e98bcc7b7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 24 Jan 2017 15:40:06 +0100 Subject: sched/rt: Add a missing rescheduling point Since the change in commit: fd7a4bed1835 ("sched, rt: Convert switched_{from, to}_rt() / prio_changed_rt() to balance callbacks") ... we don't reschedule a task under certain circumstances: Lets say task-A, SCHED_OTHER, is running on CPU0 (and it may run only on CPU0) and holds a PI lock. This task is removed from the CPU because it used up its time slice and another SCHED_OTHER task is running. Task-B on CPU1 runs at RT priority and asks for the lock owned by task-A. This results in a priority boost for task-A. Task-B goes to sleep until the lock has been made available. Task-A is already runnable (but not active), so it receives no wake up. The reality now is that task-A gets on the CPU once the scheduler decides to remove the current task despite the fact that a high priority task is enqueued and waiting. This may take a long time. The desired behaviour is that CPU0 immediately reschedules after the priority boost which made task-A the task with the lowest priority. Suggested-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Thomas Gleixner Fixes: fd7a4bed1835 ("sched, rt: Convert switched_{from, to}_rt() prio_changed_rt() to balance callbacks") Link: http://lkml.kernel.org/r/20170124144006.29821-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 3 +-- kernel/sched/rt.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 491ff663e1b6..27737f34757d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1729,12 +1729,11 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded) queue_push_tasks(rq); -#else +#endif if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); else resched_curr(rq); -#endif } } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 88254be118b0..704f2b89abf1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2198,10 +2198,9 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded) queue_push_tasks(rq); -#else +#endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio) resched_curr(rq); -#endif /* CONFIG_SMP */ } } -- cgit v1.2.3 From 975e155ed8732cb81f55c021c441ae662dd040b5 Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 28 Jan 2017 22:00:49 +0800 Subject: sched/rt: Show the 'sched_rr_timeslice' SCHED_RR timeslice tuning knob in milliseconds We added the 'sched_rr_timeslice_ms' SCHED_RR tuning knob in this commit: ce0dbbbb30ae ("sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice") ... which name suggests to users that it's in milliseconds, while in reality it's being set in milliseconds but the result is shown in jiffies. This is obviously confusing when HZ is not 1000, it makes it appear like the value set failed, such as HZ=100: root# echo 100 > /proc/sys/kernel/sched_rr_timeslice_ms root# cat /proc/sys/kernel/sched_rr_timeslice_ms 10 Fix this to be milliseconds all around. Signed-off-by: Shile Zhang Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485612049-20923-1-git-send-email-shile.zhang@nokia.com Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 1 + kernel/sched/core.c | 5 +++-- kernel/sched/rt.c | 1 + kernel/sysctl.c | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/sched/rt.c') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 441145351301..49308e142aae 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -59,6 +59,7 @@ extern unsigned int sysctl_sched_cfs_bandwidth_slice; extern unsigned int sysctl_sched_autogroup_enabled; #endif +extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; extern int sched_rr_handler(struct ctl_table *table, int write, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d01f9d047397..10e18faaa632 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8471,8 +8471,9 @@ int sched_rr_handler(struct ctl_table *table, int write, /* make sure that internally we keep jiffies */ /* also, writing zero resets timeslice to default */ if (!ret && write) { - sched_rr_timeslice = sched_rr_timeslice <= 0 ? - RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); + sched_rr_timeslice = + sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : + msecs_to_jiffies(sysctl_sched_rr_timeslice); } mutex_unlock(&mutex); return ret; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 704f2b89abf1..4101f9d1aa40 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -9,6 +9,7 @@ #include int sched_rr_timeslice = RR_TIMESLICE; +int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1aea594a54db..bb260ceb3718 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -416,7 +416,7 @@ static struct ctl_table kern_table[] = { }, { .procname = "sched_rr_timeslice_ms", - .data = &sched_rr_timeslice, + .data = &sysctl_sched_rr_timeslice, .maxlen = sizeof(int), .mode = 0644, .proc_handler = sched_rr_handler, -- cgit v1.2.3