diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 358 |
1 files changed, 177 insertions, 181 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4de6acb9b8ec..421740f1bcdc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -315,6 +315,7 @@ static const unsigned int memcg_node_stat_items[] = { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + PGDEMOTE_PROACTIVE, #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif @@ -431,9 +432,11 @@ static const unsigned int memcg_vm_event_stat[] = { PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, + PGSCAN_PROACTIVE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, + PGSTEAL_PROACTIVE, PGFAULT, PGMAJFAULT, PGREFILL, @@ -706,6 +709,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, trace_mod_memcg_state(memcg, idx, val); } +#ifdef CONFIG_MEMCG_V1 /* idx can be of type enum memcg_stat_item or node_stat_item. */ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) { @@ -722,6 +726,7 @@ unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx) #endif return x; } +#endif static void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, @@ -869,6 +874,7 @@ unsigned long memcg_events(struct mem_cgroup *memcg, int event) return READ_ONCE(memcg->vmstats->events[i]); } +#ifdef CONFIG_MEMCG_V1 unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) { int i = memcg_events_index(event); @@ -878,6 +884,7 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event) return READ_ONCE(memcg->vmstats->events_local[i]); } +#endif struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) { @@ -1390,6 +1397,7 @@ static const struct memory_stat memory_stats[] = { { "pgdemote_kswapd", PGDEMOTE_KSWAPD }, { "pgdemote_direct", PGDEMOTE_DIRECT }, { "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED }, + { "pgdemote_proactive", PGDEMOTE_PROACTIVE }, #ifdef CONFIG_NUMA_BALANCING { "pgpromote_success", PGPROMOTE_SUCCESS }, #endif @@ -1432,6 +1440,7 @@ static int memcg_page_state_output_unit(int item) case PGDEMOTE_KSWAPD: case PGDEMOTE_DIRECT: case PGDEMOTE_KHUGEPAGED: + case PGDEMOTE_PROACTIVE: #ifdef CONFIG_NUMA_BALANCING case PGPROMOTE_SUCCESS: #endif @@ -1447,11 +1456,13 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item) memcg_page_state_output_unit(item); } +#ifdef CONFIG_MEMCG_V1 unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item) { return memcg_page_state_local(memcg, item) * memcg_page_state_output_unit(item); } +#endif #ifdef CONFIG_HUGETLB_PAGE static bool memcg_accounts_hugetlb(void) @@ -1503,10 +1514,12 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) seq_buf_printf(s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT) + + memcg_events(memcg, PGSCAN_PROACTIVE) + memcg_events(memcg, PGSCAN_KHUGEPAGED)); seq_buf_printf(s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT) + + memcg_events(memcg, PGSTEAL_PROACTIVE) + memcg_events(memcg, PGSTEAL_KHUGEPAGED)); for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) { @@ -1566,16 +1579,23 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) /* Use static buffer, for the caller is holding oom_lock. */ static char buf[SEQ_BUF_SIZE]; struct seq_buf s; + unsigned long memory_failcnt; lockdep_assert_held(&oom_lock); + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memory_failcnt = atomic_long_read(&memcg->memory_events[MEMCG_MAX]); + else + memory_failcnt = memcg->memory.failcnt; + pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->memory)), - K((u64)READ_ONCE(memcg->memory.max)), memcg->memory.failcnt); + K((u64)READ_ONCE(memcg->memory.max)), memory_failcnt); if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) pr_info("swap: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->swap)), - K((u64)READ_ONCE(memcg->swap.max)), memcg->swap.failcnt); + K((u64)READ_ONCE(memcg->swap.max)), + atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX])); #ifdef CONFIG_MEMCG_V1 else { pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n", @@ -1739,7 +1759,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } struct memcg_stock_pcp { - local_lock_t stock_lock; + localtry_lock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1754,7 +1774,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { - .stock_lock = INIT_LOCAL_LOCK(stock_lock), + .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); @@ -1766,6 +1786,7 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, * consume_stock: Try to consume stocked charge on this cpu. * @memcg: memcg to consume from. * @nr_pages: how many pages to charge. + * @gfp_mask: allocation mask. * * The charges will only happen if @memcg matches the current cpu's memcg * stock, and at least @nr_pages are available in that stock. Failure to @@ -1773,7 +1794,8 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, * * returns true if successful, false otherwise. */ -static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, + gfp_t gfp_mask) { struct memcg_stock_pcp *stock; unsigned int stock_pages; @@ -1783,7 +1805,11 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) if (nr_pages > MEMCG_CHARGE_BATCH) return ret; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + if (!gfpflags_allow_spinning(gfp_mask)) + return ret; + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + } stock = this_cpu_ptr(&memcg_stock); stock_pages = READ_ONCE(stock->nr_pages); @@ -1792,7 +1818,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) ret = true; } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -1831,14 +1857,14 @@ static void drain_local_stock(struct work_struct *dummy) * drain_stock races is that we always operate on local CPU stock * here with IRQ disabled */ - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); old = drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -1868,9 +1894,20 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long flags; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + /* + * In case of unlikely failure to lock percpu stock_lock + * uncharge memcg directly. + */ + if (mem_cgroup_is_root(memcg)) + return; + page_counter_uncharge(&memcg->memory, nr_pages); + if (do_memsw_account()) + page_counter_uncharge(&memcg->memsw, nr_pages); + return; + } __refill_stock(memcg, nr_pages); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); } /* @@ -1921,9 +1958,18 @@ void drain_all_stock(struct mem_cgroup *root_memcg) static int memcg_hotplug_cpu_dead(unsigned int cpu) { struct memcg_stock_pcp *stock; + struct obj_cgroup *old; + unsigned long flags; stock = &per_cpu(memcg_stock, cpu); + + /* drain_obj_stock requires stock_lock */ + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + old = drain_obj_stock(stock); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + drain_stock(stock); + obj_cgroup_put(old); return 0; } @@ -2198,8 +2244,8 @@ out: css_put(&memcg->css); } -int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, - unsigned int nr_pages) +static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) { unsigned int batch = max(MEMCG_CHARGE_BATCH, nr_pages); int nr_retries = MAX_RECLAIM_RETRIES; @@ -2213,9 +2259,13 @@ int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, unsigned long pflags; retry: - if (consume_stock(memcg, nr_pages)) + if (consume_stock(memcg, nr_pages, gfp_mask)) return 0; + if (!gfpflags_allow_spinning(gfp_mask)) + /* Avoid the refill and flush of the older stock */ + batch = nr_pages; + if (!do_memsw_account() || page_counter_try_charge(&memcg->memsw, batch, &counter)) { if (page_counter_try_charge(&memcg->memory, batch, &counter)) @@ -2388,6 +2438,15 @@ done_restock: return 0; } +static inline int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, + unsigned int nr_pages) +{ + if (mem_cgroup_is_root(memcg)) + return 0; + + return try_charge_memcg(memcg, gfp_mask, nr_pages); +} + static void commit_charge(struct folio *folio, struct mem_cgroup *memcg) { VM_BUG_ON_FOLIO(folio_memcg_charged(folio), folio); @@ -2612,7 +2671,8 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); - refill_stock(memcg, nr_pages); + if (!mem_cgroup_is_root(memcg)) + refill_stock(memcg, nr_pages); css_put(&memcg->css); } @@ -2645,6 +2705,23 @@ out: return ret; } +static struct obj_cgroup *page_objcg(const struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + if (mem_cgroup_disabled() || !memcg_data) + return NULL; + + VM_BUG_ON_PAGE((memcg_data & OBJEXTS_FLAGS_MASK) != MEMCG_DATA_KMEM, + page); + return (struct obj_cgroup *)(memcg_data - MEMCG_DATA_KMEM); +} + +static void page_set_objcg(struct page *page, const struct obj_cgroup *objcg) +{ + page->memcg_data = (unsigned long)objcg | MEMCG_DATA_KMEM; +} + /** * __memcg_kmem_charge_page: charge a kmem page to the current memory cgroup * @page: page to charge @@ -2663,8 +2740,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order); if (!ret) { obj_cgroup_get(objcg); - page->memcg_data = (unsigned long)objcg | - MEMCG_DATA_KMEM; + page_set_objcg(page, objcg); return 0; } } @@ -2678,19 +2754,31 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { - struct folio *folio = page_folio(page); - struct obj_cgroup *objcg; + struct obj_cgroup *objcg = page_objcg(page); unsigned int nr_pages = 1 << order; - if (!folio_memcg_kmem(folio)) + if (!objcg) return; - objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); - folio->memcg_data = 0; + page->memcg_data = 0; obj_cgroup_put(objcg); } +/* Replace the stock objcg with objcg, return the old objcg */ +static struct obj_cgroup *replace_stock_objcg(struct memcg_stock_pcp *stock, + struct obj_cgroup *objcg) +{ + struct obj_cgroup *old = NULL; + + old = drain_obj_stock(stock); + obj_cgroup_get(objcg); + stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) + ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; + WRITE_ONCE(stock->cached_objcg, objcg); + return old; +} + static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr) { @@ -2699,7 +2787,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, unsigned long flags; int *bytes; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); /* @@ -2708,11 +2796,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, * changes. */ if (READ_ONCE(stock->cached_objcg) != objcg) { - old = drain_obj_stock(stock); - obj_cgroup_get(objcg); - stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) - ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; - WRITE_ONCE(stock->cached_objcg, objcg); + old = replace_stock_objcg(stock, objcg); stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ @@ -2752,7 +2836,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) __mod_objcg_mlstate(objcg, pgdat, idx, nr); - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -2762,7 +2846,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { @@ -2770,7 +2854,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -2862,15 +2946,11 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages = 0; - local_lock_irqsave(&memcg_stock.stock_lock, flags); + localtry_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ - old = drain_obj_stock(stock); - obj_cgroup_get(objcg); - WRITE_ONCE(stock->cached_objcg, objcg); - stock->nr_bytes = atomic_read(&objcg->nr_charged_bytes) - ? atomic_xchg(&objcg->nr_charged_bytes, 0) : 0; + old = replace_stock_objcg(stock, objcg); allow_uncharge = true; /* Allow uncharge when objcg changes */ } stock->nr_bytes += nr_bytes; @@ -2880,7 +2960,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - local_unlock_irqrestore(&memcg_stock.stock_lock, flags); + localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); if (nr_pages) @@ -3028,25 +3108,33 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab, } /* - * Because folio_memcg(head) is not set on tails, set it now. + * The objcg is only set on the first page, so transfer it to all the + * other pages. */ -void split_page_memcg(struct page *head, int old_order, int new_order) +void split_page_memcg(struct page *page, unsigned order) { - struct folio *folio = page_folio(head); - int i; - unsigned int old_nr = 1 << old_order; - unsigned int new_nr = 1 << new_order; + struct obj_cgroup *objcg = page_objcg(page); + unsigned int i, nr = 1 << order; - if (mem_cgroup_disabled() || !folio_memcg_charged(folio)) + if (!objcg) return; - for (i = new_nr; i < old_nr; i += new_nr) - folio_page(folio, i)->memcg_data = folio->memcg_data; + for (i = 1; i < nr; i++) + page_set_objcg(&page[i], objcg); - if (folio_memcg_kmem(folio)) - obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1); - else - css_get_many(&folio_memcg(folio)->css, old_nr / new_nr - 1); + obj_cgroup_get_many(objcg, nr - 1); +} + +void folio_split_memcg_refs(struct folio *folio, unsigned old_order, + unsigned new_order) +{ + unsigned new_refs; + + if (mem_cgroup_disabled() || !folio_memcg_charged(folio)) + return; + + new_refs = (1 << (old_order - new_order)) - 1; + css_get_many(&__folio_memcg(folio)->css, new_refs); } unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) @@ -3374,7 +3462,7 @@ void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, refcount_add(n, &memcg->id.ref); } -void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) +static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { if (refcount_sub_and_test(n, &memcg->id.ref)) { mem_cgroup_id_remove(memcg); @@ -3389,6 +3477,24 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) mem_cgroup_id_put_many(memcg, 1); } +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +{ + while (!refcount_inc_not_zero(&memcg->id.ref)) { + /* + * The root cgroup cannot be destroyed, so it's refcount must + * always be >= 1. + */ + if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { + VM_BUG_ON(1); + break; + } + memcg = parent_mem_cgroup(memcg); + if (!memcg) + memcg = root_mem_cgroup; + } + return memcg; +} + /** * mem_cgroup_from_id - look up a memcg from a memcg id * @id: the memcg id to look up @@ -3424,6 +3530,16 @@ struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino) } #endif +static void free_mem_cgroup_per_node_info(struct mem_cgroup_per_node *pn) +{ + if (!pn) + return; + + free_percpu(pn->lruvec_stats_percpu); + kfree(pn->lruvec_stats); + kfree(pn); +} + static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; @@ -3448,23 +3564,10 @@ static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) memcg->nodeinfo[node] = pn; return true; fail: - kfree(pn->lruvec_stats); - kfree(pn); + free_mem_cgroup_per_node_info(pn); return false; } -static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) -{ - struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; - - if (!pn) - return; - - free_percpu(pn->lruvec_stats_percpu); - kfree(pn->lruvec_stats); - kfree(pn); -} - static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; @@ -3472,7 +3575,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) obj_cgroup_put(memcg->orig_objcg); for_each_node(node) - free_mem_cgroup_per_node_info(memcg, node); + free_mem_cgroup_per_node_info(memcg->nodeinfo[node]); memcg1_free_events(memcg); kfree(memcg->vmstats); free_percpu(memcg->vmstats_percpu); @@ -3565,6 +3668,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { struct mem_cgroup *parent = mem_cgroup_from_css(parent_css); struct mem_cgroup *memcg, *old_memcg; + bool memcg_on_dfl = cgroup_subsys_on_dfl(memory_cgrp_subsys); old_memcg = set_active_memcg(parent); memcg = mem_cgroup_alloc(parent); @@ -3582,9 +3686,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent) { WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent)); - page_counter_init(&memcg->memory, &parent->memory, true); + page_counter_init(&memcg->memory, &parent->memory, memcg_on_dfl); page_counter_init(&memcg->swap, &parent->swap, false); #ifdef CONFIG_MEMCG_V1 + memcg->memory.track_failcnt = !memcg_on_dfl; WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); page_counter_init(&memcg->kmem, &parent->kmem, false); page_counter_init(&memcg->tcpmem, &parent->tcpmem, false); @@ -3602,7 +3707,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; } - if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket) + if (memcg_on_dfl && !cgroup_memory_nosocket) static_branch_inc(&memcg_sockets_enabled_key); if (!cgroup_memory_nobpf) @@ -4004,7 +4109,7 @@ static ssize_t peak_write(struct kernfs_open_file *of, char *buf, size_t nbytes, WRITE_ONCE(peer_ctx->value, usage); /* initial write, register watcher */ - if (ofp->value == -1) + if (ofp->value == OFP_PEAK_UNSET) list_add(&ofp->list, watchers); WRITE_ONCE(ofp->value, usage); @@ -4577,40 +4682,6 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, return ret; } -/* - * mem_cgroup_swapin_uncharge_swap - uncharge swap slot - * @entry: the first swap entry for which the pages are charged - * @nr_pages: number of pages which will be uncharged - * - * Call this function after successfully adding the charged page to swapcache. - * - * Note: This function assumes the page for which swap slot is being uncharged - * is order 0 page. - */ -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) -{ - /* - * Cgroup1's unified memory+swap counter has been charged with the - * new swapcache page, finish the transfer by uncharging the swap - * slot. The swap slot would also get uncharged when it dies, but - * it can stick around indefinitely and we'd count the page twice - * the entire time. - * - * Cgroup2 has separate resource counters for memory and swap, - * so this is a non-issue here. Memory and swap charge lifetimes - * correspond 1:1 to page and swap slot lifetimes: we charge the - * page to memory here, and uncharge swap when the slot is freed. - */ - if (do_memsw_account()) { - /* - * The swap entry might not get freed for a long time, - * let's not wait for it. The page already received a - * memory+swap charge, drop the swap entry duplicate. - */ - mem_cgroup_uncharge_swap(entry, nr_pages); - } -} - struct uncharge_gather { struct mem_cgroup *memcg; unsigned long nr_memory; @@ -4861,7 +4932,7 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages, if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return memcg1_charge_skmem(memcg, nr_pages, gfp_mask); - if (try_charge(memcg, gfp_mask, nr_pages) == 0) { + if (try_charge_memcg(memcg, gfp_mask, nr_pages) == 0) { mod_memcg_state(memcg, MEMCG_SOCK, nr_pages); return true; } @@ -4936,81 +5007,6 @@ static int __init mem_cgroup_init(void) subsys_initcall(mem_cgroup_init); #ifdef CONFIG_SWAP -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) -{ - while (!refcount_inc_not_zero(&memcg->id.ref)) { - /* - * The root cgroup cannot be destroyed, so it's refcount must - * always be >= 1. - */ - if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { - VM_BUG_ON(1); - break; - } - memcg = parent_mem_cgroup(memcg); - if (!memcg) - memcg = root_mem_cgroup; - } - return memcg; -} - -/** - * mem_cgroup_swapout - transfer a memsw charge to swap - * @folio: folio whose memsw charge to transfer - * @entry: swap entry to move the charge to - * - * Transfer the memsw charge of @folio to @entry. - */ -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) -{ - struct mem_cgroup *memcg, *swap_memcg; - unsigned int nr_entries; - - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); - VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); - - if (mem_cgroup_disabled()) - return; - - if (!do_memsw_account()) - return; - - memcg = folio_memcg(folio); - - VM_WARN_ON_ONCE_FOLIO(!memcg, folio); - if (!memcg) - return; - - /* - * In case the memcg owning these pages has been offlined and doesn't - * have an ID allocated to it anymore, charge the closest online - * ancestor for the swap instead and transfer the memory+swap charge. - */ - swap_memcg = mem_cgroup_id_get_online(memcg); - nr_entries = folio_nr_pages(folio); - /* Get references for the tail pages, too */ - if (nr_entries > 1) - mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); - mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - - swap_cgroup_record(folio, entry); - - folio_unqueue_deferred_split(folio); - folio->memcg_data = 0; - - if (!mem_cgroup_is_root(memcg)) - page_counter_uncharge(&memcg->memory, nr_entries); - - if (memcg != swap_memcg) { - if (!mem_cgroup_is_root(swap_memcg)) - page_counter_charge(&swap_memcg->memsw, nr_entries); - page_counter_uncharge(&memcg->memsw, nr_entries); - } - - memcg1_swapout(folio, memcg); - css_put(&memcg->css); -} - /** * __mem_cgroup_try_charge_swap - try charging swap space for a folio * @folio: folio being added to swap @@ -5055,7 +5051,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) mem_cgroup_id_get_many(memcg, nr_pages - 1); mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); - swap_cgroup_record(folio, entry); + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); return 0; } |