summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-01-08 01:02:00 -0800
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 20:13:44 -0800
commit04c19fa6f16047abff2288ddbc1f0798ede5a849 (patch)
tree4c4f109d919042b300ac907a8fde64b822faa7aa
parent4225399a66b315d4d1fb1cb61b75dda201c832e3 (diff)
downloadlwn-04c19fa6f16047abff2288ddbc1f0798ede5a849.tar.gz
lwn-04c19fa6f16047abff2288ddbc1f0798ede5a849.zip
[PATCH] cpuset: migrate all tasks in cpuset at once
Given the mechanism in the previous patch to handle rebinding the per-vma mempolicies of all tasks in a cpuset that changes its memory placement, it is now easier to handle the page migration requirements of such tasks at the same time. The previous code didn't actually attempt to migrate the pages of the tasks in a cpuset whose memory placement changed until the next time each such task tried to allocate memory. This was undesirable, as users invoking memory page migration exected to happen when the placement changed, not some unspecified time later when the task needed more memory. It is now trivial to handle the page migration at the same time as the per-vma rebinding is done. The routine cpuset.c:update_nodemask(), which handles changing a cpusets memory placement ('mems') now checks for the special case of being asked to write a placement that is the same as before. It was harmless enough before to just recompute everything again, even though nothing had changed. But page migration is a heavy weight operation - moving pages about. So now it is worth avoiding that if asked to move a cpuset to its current location. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/cpuset.c29
1 files changed, 16 insertions, 13 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 19f87565be17..cf8203a5fa71 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -639,25 +639,14 @@ void cpuset_update_task_memory_state()
task_unlock(tsk);
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
- nodemask_t oldmem = tsk->mems_allowed;
- int migrate;
-
down(&callback_sem);
task_lock(tsk);
cs = tsk->cpuset; /* Maybe changed when task not locked */
- migrate = is_memory_migrate(cs);
guarantee_online_mems(cs, &tsk->mems_allowed);
tsk->cpuset_mems_generation = cs->mems_generation;
task_unlock(tsk);
up(&callback_sem);
mpol_rebind_task(tsk, &tsk->mems_allowed);
- if (!nodes_equal(oldmem, tsk->mems_allowed)) {
- if (migrate) {
- do_migrate_pages(tsk->mm, &oldmem,
- &tsk->mems_allowed,
- MPOL_MF_MOVE_ALL);
- }
- }
}
}
@@ -815,7 +804,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
* Handle user request to change the 'mems' memory placement
* of a cpuset. Needs to validate the request, update the
* cpusets mems_allowed and mems_generation, and for each
- * task in the cpuset, rebind any vma mempolicies.
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
*
* Call with manage_sem held. May take callback_sem during call.
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
@@ -826,9 +817,11 @@ static int update_cpumask(struct cpuset *cs, char *buf)
static int update_nodemask(struct cpuset *cs, char *buf)
{
struct cpuset trialcs;
+ nodemask_t oldmem;
struct task_struct *g, *p;
struct mm_struct **mmarray;
int i, n, ntasks;
+ int migrate;
int fudge;
int retval;
@@ -837,6 +830,11 @@ static int update_nodemask(struct cpuset *cs, char *buf)
if (retval < 0)
goto done;
nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
+ oldmem = cs->mems_allowed;
+ if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+ retval = 0; /* Too easy - nothing to do */
+ goto done;
+ }
if (nodes_empty(trialcs.mems_allowed)) {
retval = -ENOSPC;
goto done;
@@ -908,12 +906,17 @@ static int update_nodemask(struct cpuset *cs, char *buf)
* cpuset manage_sem, we know that no other rebind effort will
* be contending for the global variable cpuset_being_rebound.
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
- * is idempotent.
+ * is idempotent. Also migrate pages in each mm to new nodes.
*/
+ migrate = is_memory_migrate(cs);
for (i = 0; i < n; i++) {
struct mm_struct *mm = mmarray[i];
mpol_rebind_mm(mm, &cs->mems_allowed);
+ if (migrate) {
+ do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
+ MPOL_MF_MOVE_ALL);
+ }
mmput(mm);
}