diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 246 |
1 files changed, 153 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 772bac21d155..051b82ebf371 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -34,7 +34,7 @@ #include <linux/shmem_fs.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/pagevec.h> +#include <linux/folio_batch.h> #include <linux/vm_event_item.h> #include <linux/smp.h> #include <linux/page-flags.h> @@ -317,6 +317,7 @@ static const unsigned int memcg_node_stat_items[] = { NR_SHMEM_THPS, NR_FILE_THPS, NR_ANON_THPS, + NR_VMALLOC, NR_KERNEL_STACK_KB, NR_PAGETABLE, NR_SECONDARY_PAGETABLE, @@ -330,6 +331,19 @@ static const unsigned int memcg_node_stat_items[] = { PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, PGDEMOTE_PROACTIVE, + PGSTEAL_KSWAPD, + PGSTEAL_DIRECT, + PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, + PGSTEAL_ANON, + PGSTEAL_FILE, + PGSCAN_KSWAPD, + PGSCAN_DIRECT, + PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, + PGSCAN_ANON, + PGSCAN_FILE, + PGREFILL, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif @@ -339,10 +353,10 @@ static const unsigned int memcg_stat_items[] = { MEMCG_SWAP, MEMCG_SOCK, MEMCG_PERCPU_B, - MEMCG_VMALLOC, MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, + MEMCG_ZSWAP_INCOMP, }; #define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items) @@ -443,17 +457,8 @@ static const unsigned int memcg_vm_event_stat[] = { #endif PSWPIN, PSWPOUT, - PGSCAN_KSWAPD, - PGSCAN_DIRECT, - PGSCAN_KHUGEPAGED, - PGSCAN_PROACTIVE, - PGSTEAL_KSWAPD, - PGSTEAL_DIRECT, - PGSTEAL_KHUGEPAGED, - PGSTEAL_PROACTIVE, PGFAULT, PGMAJFAULT, - PGREFILL, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, @@ -1359,11 +1364,12 @@ static const struct memory_stat memory_stats[] = { { "sec_pagetables", NR_SECONDARY_PAGETABLE }, { "percpu", MEMCG_PERCPU_B }, { "sock", MEMCG_SOCK }, - { "vmalloc", MEMCG_VMALLOC }, + { "vmalloc", NR_VMALLOC }, { "shmem", NR_SHMEM }, #ifdef CONFIG_ZSWAP { "zswap", MEMCG_ZSWAP_B }, { "zswapped", MEMCG_ZSWAPPED }, + { "zswap_incomp", MEMCG_ZSWAP_INCOMP }, #endif { "file_mapped", NR_FILE_MAPPED }, { "file_dirty", NR_FILE_DIRTY }, @@ -1400,6 +1406,15 @@ static const struct memory_stat memory_stats[] = { { "pgdemote_direct", PGDEMOTE_DIRECT }, { "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED }, { "pgdemote_proactive", PGDEMOTE_PROACTIVE }, + { "pgsteal_kswapd", PGSTEAL_KSWAPD }, + { "pgsteal_direct", PGSTEAL_DIRECT }, + { "pgsteal_khugepaged", PGSTEAL_KHUGEPAGED }, + { "pgsteal_proactive", PGSTEAL_PROACTIVE }, + { "pgscan_kswapd", PGSCAN_KSWAPD }, + { "pgscan_direct", PGSCAN_DIRECT }, + { "pgscan_khugepaged", PGSCAN_KHUGEPAGED }, + { "pgscan_proactive", PGSCAN_PROACTIVE }, + { "pgrefill", PGREFILL }, #ifdef CONFIG_NUMA_BALANCING { "pgpromote_success", PGPROMOTE_SUCCESS }, #endif @@ -1443,6 +1458,15 @@ static int memcg_page_state_output_unit(int item) case PGDEMOTE_DIRECT: case PGDEMOTE_KHUGEPAGED: case PGDEMOTE_PROACTIVE: + case PGSTEAL_KSWAPD: + case PGSTEAL_DIRECT: + case PGSTEAL_KHUGEPAGED: + case PGSTEAL_PROACTIVE: + case PGSCAN_KSWAPD: + case PGSCAN_DIRECT: + case PGSCAN_KHUGEPAGED: + case PGSCAN_PROACTIVE: + case PGREFILL: #ifdef CONFIG_NUMA_BALANCING case PGPROMOTE_SUCCESS: #endif @@ -1514,15 +1538,15 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) /* Accumulated memory events */ seq_buf_printf(s, "pgscan %lu\n", - memcg_events(memcg, PGSCAN_KSWAPD) + - memcg_events(memcg, PGSCAN_DIRECT) + - memcg_events(memcg, PGSCAN_PROACTIVE) + - memcg_events(memcg, PGSCAN_KHUGEPAGED)); + memcg_page_state(memcg, PGSCAN_KSWAPD) + + memcg_page_state(memcg, PGSCAN_DIRECT) + + memcg_page_state(memcg, PGSCAN_PROACTIVE) + + memcg_page_state(memcg, PGSCAN_KHUGEPAGED)); seq_buf_printf(s, "pgsteal %lu\n", - memcg_events(memcg, PGSTEAL_KSWAPD) + - memcg_events(memcg, PGSTEAL_DIRECT) + - memcg_events(memcg, PGSTEAL_PROACTIVE) + - memcg_events(memcg, PGSTEAL_KHUGEPAGED)); + memcg_page_state(memcg, PGSTEAL_KSWAPD) + + memcg_page_state(memcg, PGSTEAL_DIRECT) + + memcg_page_state(memcg, PGSTEAL_PROACTIVE) + + memcg_page_state(memcg, PGSTEAL_KHUGEPAGED)); for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { #ifdef CONFIG_MEMCG_V1 @@ -2361,7 +2385,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, struct page_counter *counter; unsigned long nr_reclaimed; bool passed_oom = false; - unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP; + unsigned int reclaim_options; bool drained = false; bool raised_max_event = false; unsigned long pflags; @@ -2375,6 +2399,7 @@ retry: /* Avoid the refill and flush of the older stock */ batch = nr_pages; + reclaim_options = MEMCG_RECLAIM_MAY_SWAP; if (!do_memsw_account() || page_counter_try_charge(&memcg->memsw, batch, &counter)) { if (page_counter_try_charge(&memcg->memory, batch, &counter)) @@ -2926,12 +2951,30 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) obj_cgroup_put(objcg); } +static struct obj_stock_pcp *trylock_stock(void) +{ + if (local_trylock(&obj_stock.lock)) + return this_cpu_ptr(&obj_stock); + + return NULL; +} + +static void unlock_stock(struct obj_stock_pcp *stock) +{ + if (stock) + local_unlock(&obj_stock.lock); +} + +/* Call after __refill_obj_stock() to ensure stock->cached_objg == objcg */ static void __account_obj_stock(struct obj_cgroup *objcg, struct obj_stock_pcp *stock, int nr, struct pglist_data *pgdat, enum node_stat_item idx) { int *bytes; + if (!stock || READ_ONCE(stock->cached_objcg) != objcg) + goto direct; + /* * Save vmstat data in stock and skip vmstat array update unless * accumulating over a page of vmstat data or when pgdat changes. @@ -2971,29 +3014,35 @@ static void __account_obj_stock(struct obj_cgroup *objcg, nr = 0; } } +direct: if (nr) mod_objcg_mlstate(objcg, pgdat, idx, nr); } -static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, - struct pglist_data *pgdat, enum node_stat_item idx) +static bool __consume_obj_stock(struct obj_cgroup *objcg, + struct obj_stock_pcp *stock, + unsigned int nr_bytes) +{ + if (objcg == READ_ONCE(stock->cached_objcg) && + stock->nr_bytes >= nr_bytes) { + stock->nr_bytes -= nr_bytes; + return true; + } + + return false; +} + +static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) { struct obj_stock_pcp *stock; bool ret = false; - if (!local_trylock(&obj_stock.lock)) + stock = trylock_stock(); + if (!stock) return ret; - stock = this_cpu_ptr(&obj_stock); - if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { - stock->nr_bytes -= nr_bytes; - ret = true; - - if (pgdat) - __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx); - } - - local_unlock(&obj_stock.lock); + ret = __consume_obj_stock(objcg, stock, nr_bytes); + unlock_stock(stock); return ret; } @@ -3077,23 +3126,20 @@ static bool obj_stock_flush_required(struct obj_stock_pcp *stock, return flush; } -static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, - bool allow_uncharge, int nr_acct, struct pglist_data *pgdat, - enum node_stat_item idx) +static void __refill_obj_stock(struct obj_cgroup *objcg, + struct obj_stock_pcp *stock, + unsigned int nr_bytes, + bool allow_uncharge) { - struct obj_stock_pcp *stock; unsigned int nr_pages = 0; - if (!local_trylock(&obj_stock.lock)) { - if (pgdat) - mod_objcg_mlstate(objcg, pgdat, idx, nr_acct); + if (!stock) { nr_pages = nr_bytes >> PAGE_SHIFT; nr_bytes = nr_bytes & (PAGE_SIZE - 1); atomic_add(nr_bytes, &objcg->nr_charged_bytes); goto out; } - stock = this_cpu_ptr(&obj_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ drain_obj_stock(stock); obj_cgroup_get(objcg); @@ -3105,27 +3151,45 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, } stock->nr_bytes += nr_bytes; - if (pgdat) - __account_obj_stock(objcg, stock, nr_acct, pgdat, idx); - if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) { nr_pages = stock->nr_bytes >> PAGE_SHIFT; stock->nr_bytes &= (PAGE_SIZE - 1); } - local_unlock(&obj_stock.lock); out: if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages); } -static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t size, - struct pglist_data *pgdat, enum node_stat_item idx) +static void refill_obj_stock(struct obj_cgroup *objcg, + unsigned int nr_bytes, + bool allow_uncharge) +{ + struct obj_stock_pcp *stock = trylock_stock(); + __refill_obj_stock(objcg, stock, nr_bytes, allow_uncharge); + unlock_stock(stock); +} + +static int __obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, + size_t size, size_t *remainder) +{ + size_t charge_size; + int ret; + + charge_size = PAGE_ALIGN(size); + ret = obj_cgroup_charge_pages(objcg, gfp, charge_size >> PAGE_SHIFT); + if (!ret) + *remainder = charge_size - size; + + return ret; +} + +int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) { - unsigned int nr_pages, nr_bytes; + size_t remainder; int ret; - if (likely(consume_obj_stock(objcg, size, pgdat, idx))) + if (likely(consume_obj_stock(objcg, size))) return 0; /* @@ -3151,28 +3215,16 @@ static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t * bytes is (sizeof(object) + PAGE_SIZE - 2) if there is no data * race. */ - nr_pages = size >> PAGE_SHIFT; - nr_bytes = size & (PAGE_SIZE - 1); - - if (nr_bytes) - nr_pages += 1; - - ret = obj_cgroup_charge_pages(objcg, gfp, nr_pages); - if (!ret && (nr_bytes || pgdat)) - refill_obj_stock(objcg, nr_bytes ? PAGE_SIZE - nr_bytes : 0, - false, size, pgdat, idx); + ret = __obj_cgroup_charge(objcg, gfp, size, &remainder); + if (!ret && remainder) + refill_obj_stock(objcg, remainder, false); return ret; } -int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) -{ - return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0); -} - void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) { - refill_obj_stock(objcg, size, true, 0, NULL, 0); + refill_obj_stock(objcg, size, true); } static inline size_t obj_full_size(struct kmem_cache *s) @@ -3187,6 +3239,7 @@ static inline size_t obj_full_size(struct kmem_cache *s) bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, gfp_t flags, size_t size, void **p) { + size_t obj_size = obj_full_size(s); struct obj_cgroup *objcg; struct slab *slab; unsigned long off; @@ -3227,6 +3280,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, for (i = 0; i < size; i++) { unsigned long obj_exts; struct slabobj_ext *obj_ext; + struct obj_stock_pcp *stock; slab = virt_to_slab(p[i]); @@ -3246,9 +3300,20 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, * TODO: we could batch this until slab_pgdat(slab) changes * between iterations, with a more complicated undo */ - if (obj_cgroup_charge_account(objcg, flags, obj_full_size(s), - slab_pgdat(slab), cache_vmstat_idx(s))) - return false; + stock = trylock_stock(); + if (!stock || !__consume_obj_stock(objcg, stock, obj_size)) { + size_t remainder; + + unlock_stock(stock); + if (__obj_cgroup_charge(objcg, flags, obj_size, &remainder)) + return false; + stock = trylock_stock(); + if (remainder) + __refill_obj_stock(objcg, stock, remainder, false); + } + __account_obj_stock(objcg, stock, obj_size, + slab_pgdat(slab), cache_vmstat_idx(s)); + unlock_stock(stock); obj_exts = slab_obj_exts(slab); get_slab_obj_exts(obj_exts); @@ -3270,6 +3335,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, for (int i = 0; i < objects; i++) { struct obj_cgroup *objcg; struct slabobj_ext *obj_ext; + struct obj_stock_pcp *stock; unsigned int off; off = obj_to_index(s, slab, p[i]); @@ -3279,8 +3345,13 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, continue; obj_ext->objcg = NULL; - refill_obj_stock(objcg, obj_size, true, -obj_size, - slab_pgdat(slab), cache_vmstat_idx(s)); + + stock = trylock_stock(); + __refill_obj_stock(objcg, stock, obj_size, true); + __account_obj_stock(objcg, stock, -obj_size, + slab_pgdat(slab), cache_vmstat_idx(s)); + unlock_stock(stock); + obj_cgroup_put(objcg); } } @@ -3612,13 +3683,7 @@ static void mem_cgroup_private_id_remove(struct mem_cgroup *memcg) } } -void __maybe_unused mem_cgroup_private_id_get_many(struct mem_cgroup *memcg, - unsigned int n) -{ - refcount_add(n, &memcg->id.ref); -} - -static void mem_cgroup_private_id_put_many(struct mem_cgroup *memcg, unsigned int n) +static inline void mem_cgroup_private_id_put(struct mem_cgroup *memcg, unsigned int n) { if (refcount_sub_and_test(n, &memcg->id.ref)) { mem_cgroup_private_id_remove(memcg); @@ -3628,14 +3693,9 @@ static void mem_cgroup_private_id_put_many(struct mem_cgroup *memcg, unsigned in } } -static inline void mem_cgroup_private_id_put(struct mem_cgroup *memcg) +struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg, unsigned int n) { - mem_cgroup_private_id_put_many(memcg, 1); -} - -struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg) -{ - while (!refcount_inc_not_zero(&memcg->id.ref)) { + while (!refcount_add_not_zero(n, &memcg->id.ref)) { /* * The root cgroup cannot be destroyed, so it's refcount must * always be >= 1. @@ -3935,7 +3995,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) drain_all_stock(memcg); - mem_cgroup_private_id_put(memcg); + mem_cgroup_private_id_put(memcg, 1); } static void mem_cgroup_css_released(struct cgroup_subsys_state *css) @@ -5225,19 +5285,15 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) return 0; } - memcg = mem_cgroup_private_id_get_online(memcg); + memcg = mem_cgroup_private_id_get_online(memcg, nr_pages); if (!mem_cgroup_is_root(memcg) && !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { memcg_memory_event(memcg, MEMCG_SWAP_MAX); memcg_memory_event(memcg, MEMCG_SWAP_FAIL); - mem_cgroup_private_id_put(memcg); + mem_cgroup_private_id_put(memcg, nr_pages); return -ENOMEM; } - - /* Get references for the tail pages, too */ - if (nr_pages > 1) - mem_cgroup_private_id_get_many(memcg, nr_pages - 1); mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); swap_cgroup_record(folio, mem_cgroup_private_id(memcg), entry); @@ -5266,7 +5322,7 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) page_counter_uncharge(&memcg->swap, nr_pages); } mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages); - mem_cgroup_private_id_put_many(memcg, nr_pages); + mem_cgroup_private_id_put(memcg, nr_pages); } rcu_read_unlock(); } @@ -5513,6 +5569,8 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size) memcg = obj_cgroup_memcg(objcg); mod_memcg_state(memcg, MEMCG_ZSWAP_B, size); mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1); + if (size == PAGE_SIZE) + mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, 1); rcu_read_unlock(); } @@ -5536,6 +5594,8 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size) memcg = obj_cgroup_memcg(objcg); mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size); mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1); + if (size == PAGE_SIZE) + mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, -1); rcu_read_unlock(); } |
