diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2011-03-23 16:42:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-23 19:46:28 -0700 |
commit | 6b3ae58efca06623c197fd6d91ded4aa3a8fe039 (patch) | |
tree | 6460e4e1ce206d391b862a3d398a9e22e33ecb3c | |
parent | 5564e88ba6fd2f6dcd83a592771810cd84b5ae80 (diff) | |
download | lwn-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.tar.gz lwn-6b3ae58efca06623c197fd6d91ded4aa3a8fe039.zip |
memcg: remove direct page_cgroup-to-page pointer
In struct page_cgroup, we have a full word for flags but only a few are
reserved. Use the remaining upper bits to encode, depending on
configuration, the node or the section, to enable page_cgroup-to-page
lookups without a direct pointer.
This saves a full word for every page in a system with memory cgroups
enabled.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/page_cgroup.h | 75 | ||||
-rw-r--r-- | kernel/bounds.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/page_cgroup.c | 91 |
4 files changed, 117 insertions, 55 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 6b63679ce8a1..f5de21de31dd 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -1,8 +1,26 @@ #ifndef __LINUX_PAGE_CGROUP_H #define __LINUX_PAGE_CGROUP_H +enum { + /* flags for mem_cgroup */ + PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ + PCG_CACHE, /* charged as cache */ + PCG_USED, /* this object is in use. */ + PCG_MIGRATION, /* under page migration */ + /* flags for mem_cgroup and file and I/O status */ + PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ + PCG_FILE_MAPPED, /* page is accounted as "mapped" */ + /* No lock in page_cgroup */ + PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ + __NR_PCG_FLAGS, +}; + +#ifndef __GENERATING_BOUNDS_H +#include <generated/bounds.h> + #ifdef CONFIG_CGROUP_MEM_RES_CTLR #include <linux/bit_spinlock.h> + /* * Page Cgroup can be considered as an extended mem_map. * A page_cgroup page is associated with every page descriptor. The @@ -13,7 +31,6 @@ struct page_cgroup { unsigned long flags; struct mem_cgroup *mem_cgroup; - struct page *page; struct list_head lru; /* per cgroup LRU list */ }; @@ -32,19 +49,7 @@ static inline void __init page_cgroup_init(void) #endif struct page_cgroup *lookup_page_cgroup(struct page *page); - -enum { - /* flags for mem_cgroup */ - PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ - PCG_CACHE, /* charged as cache */ - PCG_USED, /* this object is in use. */ - PCG_MIGRATION, /* under page migration */ - /* flags for mem_cgroup and file and I/O status */ - PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ - PCG_FILE_MAPPED, /* page is accounted as "mapped" */ - /* No lock in page_cgroup */ - PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ -}; +struct page *lookup_cgroup_page(struct page_cgroup *pc); #define TESTPCGFLAG(uname, lname) \ static inline int PageCgroup##uname(struct page_cgroup *pc) \ @@ -117,6 +122,39 @@ static inline void move_unlock_page_cgroup(struct page_cgroup *pc, local_irq_restore(*flags); } +#ifdef CONFIG_SPARSEMEM +#define PCG_ARRAYID_WIDTH SECTIONS_SHIFT +#else +#define PCG_ARRAYID_WIDTH NODES_SHIFT +#endif + +#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS) +#error Not enough space left in pc->flags to store page_cgroup array IDs +#endif + +/* pc->flags: ARRAY-ID | FLAGS */ + +#define PCG_ARRAYID_MASK ((1UL << PCG_ARRAYID_WIDTH) - 1) + +#define PCG_ARRAYID_OFFSET (BITS_PER_LONG - PCG_ARRAYID_WIDTH) +/* + * Zero the shift count for non-existant fields, to prevent compiler + * warnings and ensure references are optimized away. + */ +#define PCG_ARRAYID_SHIFT (PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0)) + +static inline void set_page_cgroup_array_id(struct page_cgroup *pc, + unsigned long id) +{ + pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT); + pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT; +} + +static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc) +{ + return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK; +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; @@ -137,7 +175,7 @@ static inline void __init page_cgroup_init_flatmem(void) { } -#endif +#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ #include <linux/swap.h> @@ -173,5 +211,8 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif -#endif +#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ + +#endif /* !__GENERATING_BOUNDS_H */ + +#endif /* __LINUX_PAGE_CGROUP_H */ diff --git a/kernel/bounds.c b/kernel/bounds.c index 98a51f26c136..0c9b862292b2 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -9,11 +9,13 @@ #include <linux/page-flags.h> #include <linux/mmzone.h> #include <linux/kbuild.h> +#include <linux/page_cgroup.h> void foo(void) { /* The enum constants to put into include/generated/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); + DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS); /* End of constants */ } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e286e1603e4f..660dfc27d971 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1080,7 +1080,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, if (unlikely(!PageCgroupUsed(pc))) continue; - page = pc->page; + page = lookup_cgroup_page(pc); if (unlikely(!PageLRU(page))) continue; @@ -3344,7 +3344,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, } spin_unlock_irqrestore(&zone->lru_lock, flags); - page = pc->page; + page = lookup_cgroup_page(pc); ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); if (ret == -ENOMEM) diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 59a3cd4c799d..6c3f7a6a481a 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -11,12 +11,11 @@ #include <linux/swapops.h> #include <linux/kmemleak.h> -static void __meminit -__init_page_cgroup(struct page_cgroup *pc, unsigned long pfn) +static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id) { pc->flags = 0; + set_page_cgroup_array_id(pc, id); pc->mem_cgroup = NULL; - pc->page = pfn_to_page(pfn); INIT_LIST_HEAD(&pc->lru); } static unsigned long total_usage; @@ -43,6 +42,19 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return base + offset; } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ + unsigned long pfn; + struct page *page; + pg_data_t *pgdat; + + pgdat = NODE_DATA(page_cgroup_array_id(pc)); + pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn; + page = pfn_to_page(pfn); + VM_BUG_ON(pc != lookup_page_cgroup(page)); + return page; +} + static int __init alloc_node_page_cgroup(int nid) { struct page_cgroup *base, *pc; @@ -63,7 +75,7 @@ static int __init alloc_node_page_cgroup(int nid) return -ENOMEM; for (index = 0; index < nr_pages; index++) { pc = base + index; - __init_page_cgroup(pc, start_pfn + index); + init_page_cgroup(pc, nid); } NODE_DATA(nid)->node_page_cgroup = base; total_usage += table_size; @@ -105,46 +117,53 @@ struct page_cgroup *lookup_page_cgroup(struct page *page) return section->page_cgroup + pfn; } +struct page *lookup_cgroup_page(struct page_cgroup *pc) +{ + struct mem_section *section; + struct page *page; + unsigned long nr; + + nr = page_cgroup_array_id(pc); + section = __nr_to_section(nr); + page = pfn_to_page(pc - section->page_cgroup); + VM_BUG_ON(pc != lookup_page_cgroup(page)); + return page; +} + /* __alloc_bootmem...() is protected by !slab_available() */ static int __init_refok init_section_page_cgroup(unsigned long pfn) { - struct mem_section *section = __pfn_to_section(pfn); struct page_cgroup *base, *pc; + struct mem_section *section; unsigned long table_size; + unsigned long nr; int nid, index; - if (!section->page_cgroup) { - nid = page_to_nid(pfn_to_page(pfn)); - table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; - VM_BUG_ON(!slab_is_available()); - if (node_state(nid, N_HIGH_MEMORY)) { - base = kmalloc_node(table_size, - GFP_KERNEL | __GFP_NOWARN, nid); - if (!base) - base = vmalloc_node(table_size, nid); - } else { - base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); - if (!base) - base = vmalloc(table_size); - } - /* - * The value stored in section->page_cgroup is (base - pfn) - * and it does not point to the memory block allocated above, - * causing kmemleak false positives. - */ - kmemleak_not_leak(base); + nr = pfn_to_section_nr(pfn); + section = __nr_to_section(nr); + + if (section->page_cgroup) + return 0; + + nid = page_to_nid(pfn_to_page(pfn)); + table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; + VM_BUG_ON(!slab_is_available()); + if (node_state(nid, N_HIGH_MEMORY)) { + base = kmalloc_node(table_size, + GFP_KERNEL | __GFP_NOWARN, nid); + if (!base) + base = vmalloc_node(table_size, nid); } else { - /* - * We don't have to allocate page_cgroup again, but - * address of memmap may be changed. So, we have to initialize - * again. - */ - base = section->page_cgroup + pfn; - table_size = 0; - /* check address of memmap is changed or not. */ - if (base->page == pfn_to_page(pfn)) - return 0; + base = kmalloc(table_size, GFP_KERNEL | __GFP_NOWARN); + if (!base) + base = vmalloc(table_size); } + /* + * The value stored in section->page_cgroup is (base - pfn) + * and it does not point to the memory block allocated above, + * causing kmemleak false positives. + */ + kmemleak_not_leak(base); if (!base) { printk(KERN_ERR "page cgroup allocation failure\n"); @@ -153,7 +172,7 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) for (index = 0; index < PAGES_PER_SECTION; index++) { pc = base + index; - __init_page_cgroup(pc, pfn + index); + init_page_cgroup(pc, nr); } section->page_cgroup = base - pfn; |