From 0add0c77a9bd0ce7cd3b53894fb08154881402a4 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Thu, 29 Apr 2021 22:56:36 -0700 Subject: memcg: charge before adding to swapcache on swapin Currently the kernel adds the page, allocated for swapin, to the swapcache before charging the page. This is fine but now we want a per-memcg swapcache stat which is essential for folks who wants to transparently migrate from cgroup v1's memsw to cgroup v2's memory and swap counters. In addition charging a page before exposing it to other parts of the kernel is a step in the right direction. To correctly maintain the per-memcg swapcache stat, this patch has adopted to charge the page before adding it to swapcache. One challenge in this option is the failure case of add_to_swap_cache() on which we need to undo the mem_cgroup_charge(). Specifically undoing mem_cgroup_uncharge_swap() is not simple. To resolve the issue, this patch decouples the charging for swapin pages from mem_cgroup_charge(). Two new functions are introduced, mem_cgroup_swapin_charge_page() for just charging the swapin page and mem_cgroup_swapin_uncharge_swap() for uncharging the swap slot once the page has been successfully added to the swapcache. [shakeelb@google.com: set page->private before calling swap_readpage] Link: https://lkml.kernel.org/r/20210318015959.2986837-1-shakeelb@google.com Link: https://lkml.kernel.org/r/20210305212639.775498-1-shakeelb@google.com Signed-off-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Johannes Weiner Acked-by: Hugh Dickins Tested-by: Heiko Carstens Cc: Michal Hocko Cc: Stephen Rothwell Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 117 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 43 deletions(-) (limited to 'mm/memcontrol.c') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5582e1531b43..6d0863db3e41 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6644,6 +6644,27 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); } +static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg, + gfp_t gfp) +{ + unsigned int nr_pages = thp_nr_pages(page); + int ret; + + ret = try_charge(memcg, gfp, nr_pages); + if (ret) + goto out; + + css_get(&memcg->css); + commit_charge(page, memcg); + + local_irq_disable(); + mem_cgroup_charge_statistics(memcg, page, nr_pages); + memcg_check_events(memcg, page); + local_irq_enable(); +out: + return ret; +} + /** * mem_cgroup_charge - charge a newly allocated page to a cgroup * @page: page to charge @@ -6653,55 +6674,71 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, * Try to charge @page to the memcg that @mm belongs to, reclaiming * pages according to @gfp_mask if necessary. * + * Do not use this for pages allocated for swapin. + * * Returns 0 on success. Otherwise, an error code is returned. */ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - unsigned int nr_pages = thp_nr_pages(page); - struct mem_cgroup *memcg = NULL; - int ret = 0; + struct mem_cgroup *memcg; + int ret; if (mem_cgroup_disabled()) - goto out; + return 0; - if (PageSwapCache(page)) { - swp_entry_t ent = { .val = page_private(page), }; - unsigned short id; + memcg = get_mem_cgroup_from_mm(mm); + ret = __mem_cgroup_charge(page, memcg, gfp_mask); + css_put(&memcg->css); - /* - * Every swap fault against a single page tries to charge the - * page, bail as early as possible. shmem_unuse() encounters - * already charged pages, too. page and memcg binding is - * protected by the page lock, which serializes swap cache - * removal, which in turn serializes uncharging. - */ - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (page_memcg(compound_head(page))) - goto out; + return ret; +} - id = lookup_swap_cgroup_id(ent); - rcu_read_lock(); - memcg = mem_cgroup_from_id(id); - if (memcg && !css_tryget_online(&memcg->css)) - memcg = NULL; - rcu_read_unlock(); - } +/** + * mem_cgroup_swapin_charge_page - charge a newly allocated page for swapin + * @page: page to charge + * @mm: mm context of the victim + * @gfp: reclaim mode + * @entry: swap entry for which the page is allocated + * + * This function charges a page allocated for swapin. Please call this before + * adding the page to the swapcache. + * + * Returns 0 on success. Otherwise, an error code is returned. + */ +int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm, + gfp_t gfp, swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short id; + int ret; - if (!memcg) + if (mem_cgroup_disabled()) + return 0; + + id = lookup_swap_cgroup_id(entry); + rcu_read_lock(); + memcg = mem_cgroup_from_id(id); + if (!memcg || !css_tryget_online(&memcg->css)) memcg = get_mem_cgroup_from_mm(mm); + rcu_read_unlock(); - ret = try_charge(memcg, gfp_mask, nr_pages); - if (ret) - goto out_put; + ret = __mem_cgroup_charge(page, memcg, gfp); - css_get(&memcg->css); - commit_charge(page, memcg); - - local_irq_disable(); - mem_cgroup_charge_statistics(memcg, page, nr_pages); - memcg_check_events(memcg, page); - local_irq_enable(); + css_put(&memcg->css); + return ret; +} +/* + * mem_cgroup_swapin_uncharge_swap - uncharge swap slot + * @entry: swap entry for which the page is charged + * + * Call this function after successfully adding the charged page to swapcache. + * + * Note: This function assumes the page for which swap slot is being uncharged + * is order 0 page. + */ +void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) +{ /* * Cgroup1's unified memory+swap counter has been charged with the * new swapcache page, finish the transfer by uncharging the swap @@ -6714,20 +6751,14 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) * correspond 1:1 to page and swap slot lifetimes: we charge the * page to memory here, and uncharge swap when the slot is freed. */ - if (do_memsw_account() && PageSwapCache(page)) { - swp_entry_t entry = { .val = page_private(page) }; + if (!mem_cgroup_disabled() && do_memsw_account()) { /* * The swap entry might not get freed for a long time, * let's not wait for it. The page already received a * memory+swap charge, drop the swap entry duplicate. */ - mem_cgroup_uncharge_swap(entry, nr_pages); + mem_cgroup_uncharge_swap(entry, 1); } - -out_put: - css_put(&memcg->css); -out: - return ret; } struct uncharge_gather { -- cgit v1.2.3