diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-02-07 00:14:02 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 08:42:19 -0800 |
commit | e1a1cd590e3fcb0d2e230128daf2337ea55387dc (patch) | |
tree | eb660ab340c657a1eb595b2d4d8e8b62783bf6fb /mm | |
parent | bed7161a519a2faef53e1bce1b47595e297c1d14 (diff) | |
download | lwn-e1a1cd590e3fcb0d2e230128daf2337ea55387dc.tar.gz lwn-e1a1cd590e3fcb0d2e230128daf2337ea55387dc.zip |
Memory controller: make charging gfp mask aware
Nick Piggin pointed out that swap cache and page cache addition routines
could be called from non GFP_KERNEL contexts. This patch makes the
charging routine aware of the gfp context. Charging might fail if the
cgroup is over it's limit, in which case a suitable error is returned.
This patch was tested on a Powerpc box. I am still looking at being able
to test the path, through which allocations happen in non GFP_KERNEL
contexts.
[kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 24 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 14 |
7 files changed, 31 insertions, 25 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 8ae171cc2811..63040d5e0ae2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, if (error == 0) { - error = mem_cgroup_cache_charge(page, current->mm); + error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); if (error) goto out; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff7cac602984..ac8774426fec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * 0 if the charge was successful * < 0 if the cgroup is over its limit */ -int mem_cgroup_charge(struct page *page, struct mm_struct *mm) +int mem_cgroup_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *mem; struct page_cgroup *pc, *race_pc; @@ -293,7 +294,7 @@ retry: unlock_page_cgroup(page); - pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL); + pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); if (pc == NULL) goto err; @@ -320,7 +321,14 @@ retry: * the cgroup limit. */ while (res_counter_charge(&mem->res, PAGE_SIZE)) { - if (try_to_free_mem_cgroup_pages(mem)) + bool is_atomic = gfp_mask & GFP_ATOMIC; + /* + * We cannot reclaim under GFP_ATOMIC, fail the charge + */ + if (is_atomic) + goto noreclaim; + + if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) continue; /* @@ -344,9 +352,10 @@ retry: congestion_wait(WRITE, HZ/10); continue; } - +noreclaim: css_put(&mem->css); - mem_cgroup_out_of_memory(mem, GFP_KERNEL); + if (!is_atomic) + mem_cgroup_out_of_memory(mem, GFP_KERNEL); goto free_pc; } @@ -385,7 +394,8 @@ err: /* * See if the cached pages should be charged at all? */ -int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) +int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask) { struct mem_cgroup *mem; if (!mm) @@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) mem = rcu_dereference(mm->mem_cgroup); if (mem->control_type == MEM_CGROUP_TYPE_ALL) - return mem_cgroup_charge(page, mm); + return mem_cgroup_charge(page, mm, gfp_mask); else return 0; } diff --git a/mm/memory.c b/mm/memory.c index 0ba224ea6ba4..153a54b2013c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa pte_t *pte; spinlock_t *ptl; - retval = mem_cgroup_charge(page, mm); + retval = mem_cgroup_charge(page, mm, GFP_KERNEL); if (retval) goto out; @@ -1650,7 +1650,7 @@ gotten: cow_user_page(new_page, old_page, address, vma); __SetPageUptodate(new_page); - if (mem_cgroup_charge(new_page, mm)) + if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; /* @@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(PGMAJFAULT); } - if (mem_cgroup_charge(page, mm)) { + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { delayacct_clear_flag(DELAYACCT_PF_SWAPIN); ret = VM_FAULT_OOM; goto out; @@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto oom; __SetPageUptodate(page); - if (mem_cgroup_charge(page, mm)) + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) goto oom_free_page; entry = mk_pte(page, vma->vm_page_prot); @@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, } - if (mem_cgroup_charge(page, mm)) { + if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { ret = VM_FAULT_OOM; goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index 417bbda14e5b..763794144697 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, return; } - if (mem_cgroup_charge(new, mm)) { + if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { pte_unmap(ptep); return; } diff --git a/mm/swap_state.c b/mm/swap_state.c index 88258869c8e7..581b609e748d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) error = radix_tree_preload(gfp_mask); if (!error) { - error = mem_cgroup_cache_charge(page, current->mm); + error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); if (error) goto out; diff --git a/mm/swapfile.c b/mm/swapfile.c index fddc4cc4149b..35e00c3d0286 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free) static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, unsigned long addr, swp_entry_t entry, struct page *page) { - if (mem_cgroup_charge(page, vma->vm_mm)) + if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) return -ENOMEM; inc_mm_counter(vma->vm_mm, anon_rss); diff --git a/mm/vmscan.c b/mm/vmscan.c index 215f6a726b2f..b7d868cbca09 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) #ifdef CONFIG_CGROUP_MEM_CONT -#ifdef CONFIG_HIGHMEM -#define ZONE_USERPAGES ZONE_HIGHMEM -#else -#define ZONE_USERPAGES ZONE_NORMAL -#endif - -unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) +unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, + gfp_t gfp_mask) { struct scan_control sc = { - .gfp_mask = GFP_KERNEL, + .gfp_mask = gfp_mask, .may_writepage = !laptop_mode, .may_swap = 1, .swap_cluster_max = SWAP_CLUSTER_MAX, @@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) }; int node; struct zone **zones; + int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE); for_each_online_node(node) { - zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones; + zones = NODE_DATA(node)->node_zonelists[target_zone].zones; if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) return 1; } |