diff options
author | Paul Jackson <pj@sgi.com> | 2006-01-08 01:00:56 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-08 20:12:43 -0800 |
commit | 45b07ef31d1182d2cfde7711327e3afb268bb1ac (patch) | |
tree | 3bf820531f920b43d4ed963643b1f565c82bcaa4 /kernel | |
parent | d0d963281ccb22e6f339bfdd75c6b2e31351929f (diff) | |
download | lwn-45b07ef31d1182d2cfde7711327e3afb268bb1ac.tar.gz lwn-45b07ef31d1182d2cfde7711327e3afb268bb1ac.zip |
[PATCH] cpusets: swap migration interface
Add a boolean "memory_migrate" to each cpuset, represented by a file
containing "0" or "1" in each directory below /dev/cpuset.
It defaults to false (file contains "0"). It can be set true by writing
"1" to the file.
If true, then anytime that a task is attached to the cpuset so marked, the
pages of that task will be moved to that cpuset, preserving, to the extent
practical, the cpuset-relative placement of the pages.
Also anytime that a cpuset so marked has its memory placement changed (by
writing to its "mems" file), the tasks in that cpuset will have their pages
moved to the cpusets new nodes, preserving, to the extent practical, the
cpuset-relative placement of the moved pages.
Signed-off-by: Paul Jackson <pj@sgi.com>
Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 38 |
1 files changed, 36 insertions, 2 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7430640f9816..f63383e01ec7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -87,6 +87,7 @@ struct cpuset { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEMORY_MIGRATE, CS_REMOVED, CS_NOTIFY_ON_RELEASE } cpuset_flagbits_t; @@ -112,6 +113,11 @@ static inline int notify_on_release(const struct cpuset *cs) return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); } +static inline int is_memory_migrate(const struct cpuset *cs) +{ + return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); +} + /* * Increment this atomic integer everytime any cpuset changes its * mems_allowed value. Users of cpusets can track this generation @@ -602,16 +608,24 @@ static void refresh_mems(void) if (current->cpuset_mems_generation != my_cpusets_mem_gen) { struct cpuset *cs; nodemask_t oldmem = current->mems_allowed; + int migrate; down(&callback_sem); task_lock(current); cs = current->cpuset; + migrate = is_memory_migrate(cs); guarantee_online_mems(cs, ¤t->mems_allowed); current->cpuset_mems_generation = cs->mems_generation; task_unlock(current); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) + if (!nodes_equal(oldmem, current->mems_allowed)) { numa_policy_rebind(&oldmem, ¤t->mems_allowed); + if (migrate) { + do_migrate_pages(current->mm, &oldmem, + ¤t->mems_allowed, + MPOL_MF_MOVE_ALL); + } + } } } @@ -795,7 +809,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_NOTIFY_ON_RELEASE) + * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) * cs: the cpuset to update * buf: the buffer where we read the 0 or 1 * @@ -848,6 +862,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; + nodemask_t from, to; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -893,7 +908,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) guarantee_online_cpus(cs, &cpus); set_cpus_allowed(tsk, cpus); + from = oldcs->mems_allowed; + to = cs->mems_allowed; + up(&callback_sem); + if (is_memory_migrate(cs)) + do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); put_task_struct(tsk); if (atomic_dec_and_test(&oldcs->count)) check_for_release(oldcs, ppathbuf); @@ -905,6 +925,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) typedef enum { FILE_ROOT, FILE_DIR, + FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, @@ -960,6 +981,9 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_NOTIFY_ON_RELEASE: retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; + case FILE_MEMORY_MIGRATE: + retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1060,6 +1084,9 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_NOTIFY_ON_RELEASE: *s++ = notify_on_release(cs) ? '1' : '0'; break; + case FILE_MEMORY_MIGRATE: + *s++ = is_memory_migrate(cs) ? '1' : '0'; + break; default: retval = -EINVAL; goto out; @@ -1408,6 +1435,11 @@ static struct cftype cft_notify_on_release = { .private = FILE_NOTIFY_ON_RELEASE, }; +static struct cftype cft_memory_migrate = { + .name = "memory_migrate", + .private = FILE_MEMORY_MIGRATE, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1422,6 +1454,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; |