summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c246
1 files changed, 153 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..051b82ebf371 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -34,7 +34,7 @@
#include <linux/shmem_fs.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
-#include <linux/pagevec.h>
+#include <linux/folio_batch.h>
#include <linux/vm_event_item.h>
#include <linux/smp.h>
#include <linux/page-flags.h>
@@ -317,6 +317,7 @@ static const unsigned int memcg_node_stat_items[] = {
NR_SHMEM_THPS,
NR_FILE_THPS,
NR_ANON_THPS,
+ NR_VMALLOC,
NR_KERNEL_STACK_KB,
NR_PAGETABLE,
NR_SECONDARY_PAGETABLE,
@@ -330,6 +331,19 @@ static const unsigned int memcg_node_stat_items[] = {
PGDEMOTE_DIRECT,
PGDEMOTE_KHUGEPAGED,
PGDEMOTE_PROACTIVE,
+ PGSTEAL_KSWAPD,
+ PGSTEAL_DIRECT,
+ PGSTEAL_KHUGEPAGED,
+ PGSTEAL_PROACTIVE,
+ PGSTEAL_ANON,
+ PGSTEAL_FILE,
+ PGSCAN_KSWAPD,
+ PGSCAN_DIRECT,
+ PGSCAN_KHUGEPAGED,
+ PGSCAN_PROACTIVE,
+ PGSCAN_ANON,
+ PGSCAN_FILE,
+ PGREFILL,
#ifdef CONFIG_HUGETLB_PAGE
NR_HUGETLB,
#endif
@@ -339,10 +353,10 @@ static const unsigned int memcg_stat_items[] = {
MEMCG_SWAP,
MEMCG_SOCK,
MEMCG_PERCPU_B,
- MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
+ MEMCG_ZSWAP_INCOMP,
};
#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
@@ -443,17 +457,8 @@ static const unsigned int memcg_vm_event_stat[] = {
#endif
PSWPIN,
PSWPOUT,
- PGSCAN_KSWAPD,
- PGSCAN_DIRECT,
- PGSCAN_KHUGEPAGED,
- PGSCAN_PROACTIVE,
- PGSTEAL_KSWAPD,
- PGSTEAL_DIRECT,
- PGSTEAL_KHUGEPAGED,
- PGSTEAL_PROACTIVE,
PGFAULT,
PGMAJFAULT,
- PGREFILL,
PGACTIVATE,
PGDEACTIVATE,
PGLAZYFREE,
@@ -1359,11 +1364,12 @@ static const struct memory_stat memory_stats[] = {
{ "sec_pagetables", NR_SECONDARY_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B },
{ "sock", MEMCG_SOCK },
- { "vmalloc", MEMCG_VMALLOC },
+ { "vmalloc", NR_VMALLOC },
{ "shmem", NR_SHMEM },
#ifdef CONFIG_ZSWAP
{ "zswap", MEMCG_ZSWAP_B },
{ "zswapped", MEMCG_ZSWAPPED },
+ { "zswap_incomp", MEMCG_ZSWAP_INCOMP },
#endif
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
@@ -1400,6 +1406,15 @@ static const struct memory_stat memory_stats[] = {
{ "pgdemote_direct", PGDEMOTE_DIRECT },
{ "pgdemote_khugepaged", PGDEMOTE_KHUGEPAGED },
{ "pgdemote_proactive", PGDEMOTE_PROACTIVE },
+ { "pgsteal_kswapd", PGSTEAL_KSWAPD },
+ { "pgsteal_direct", PGSTEAL_DIRECT },
+ { "pgsteal_khugepaged", PGSTEAL_KHUGEPAGED },
+ { "pgsteal_proactive", PGSTEAL_PROACTIVE },
+ { "pgscan_kswapd", PGSCAN_KSWAPD },
+ { "pgscan_direct", PGSCAN_DIRECT },
+ { "pgscan_khugepaged", PGSCAN_KHUGEPAGED },
+ { "pgscan_proactive", PGSCAN_PROACTIVE },
+ { "pgrefill", PGREFILL },
#ifdef CONFIG_NUMA_BALANCING
{ "pgpromote_success", PGPROMOTE_SUCCESS },
#endif
@@ -1443,6 +1458,15 @@ static int memcg_page_state_output_unit(int item)
case PGDEMOTE_DIRECT:
case PGDEMOTE_KHUGEPAGED:
case PGDEMOTE_PROACTIVE:
+ case PGSTEAL_KSWAPD:
+ case PGSTEAL_DIRECT:
+ case PGSTEAL_KHUGEPAGED:
+ case PGSTEAL_PROACTIVE:
+ case PGSCAN_KSWAPD:
+ case PGSCAN_DIRECT:
+ case PGSCAN_KHUGEPAGED:
+ case PGSCAN_PROACTIVE:
+ case PGREFILL:
#ifdef CONFIG_NUMA_BALANCING
case PGPROMOTE_SUCCESS:
#endif
@@ -1514,15 +1538,15 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
/* Accumulated memory events */
seq_buf_printf(s, "pgscan %lu\n",
- memcg_events(memcg, PGSCAN_KSWAPD) +
- memcg_events(memcg, PGSCAN_DIRECT) +
- memcg_events(memcg, PGSCAN_PROACTIVE) +
- memcg_events(memcg, PGSCAN_KHUGEPAGED));
+ memcg_page_state(memcg, PGSCAN_KSWAPD) +
+ memcg_page_state(memcg, PGSCAN_DIRECT) +
+ memcg_page_state(memcg, PGSCAN_PROACTIVE) +
+ memcg_page_state(memcg, PGSCAN_KHUGEPAGED));
seq_buf_printf(s, "pgsteal %lu\n",
- memcg_events(memcg, PGSTEAL_KSWAPD) +
- memcg_events(memcg, PGSTEAL_DIRECT) +
- memcg_events(memcg, PGSTEAL_PROACTIVE) +
- memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+ memcg_page_state(memcg, PGSTEAL_KSWAPD) +
+ memcg_page_state(memcg, PGSTEAL_DIRECT) +
+ memcg_page_state(memcg, PGSTEAL_PROACTIVE) +
+ memcg_page_state(memcg, PGSTEAL_KHUGEPAGED));
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
#ifdef CONFIG_MEMCG_V1
@@ -2361,7 +2385,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
struct page_counter *counter;
unsigned long nr_reclaimed;
bool passed_oom = false;
- unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
+ unsigned int reclaim_options;
bool drained = false;
bool raised_max_event = false;
unsigned long pflags;
@@ -2375,6 +2399,7 @@ retry:
/* Avoid the refill and flush of the older stock */
batch = nr_pages;
+ reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
if (!do_memsw_account() ||
page_counter_try_charge(&memcg->memsw, batch, &counter)) {
if (page_counter_try_charge(&memcg->memory, batch, &counter))
@@ -2926,12 +2951,30 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
obj_cgroup_put(objcg);
}
+static struct obj_stock_pcp *trylock_stock(void)
+{
+ if (local_trylock(&obj_stock.lock))
+ return this_cpu_ptr(&obj_stock);
+
+ return NULL;
+}
+
+static void unlock_stock(struct obj_stock_pcp *stock)
+{
+ if (stock)
+ local_unlock(&obj_stock.lock);
+}
+
+/* Call after __refill_obj_stock() to ensure stock->cached_objg == objcg */
static void __account_obj_stock(struct obj_cgroup *objcg,
struct obj_stock_pcp *stock, int nr,
struct pglist_data *pgdat, enum node_stat_item idx)
{
int *bytes;
+ if (!stock || READ_ONCE(stock->cached_objcg) != objcg)
+ goto direct;
+
/*
* Save vmstat data in stock and skip vmstat array update unless
* accumulating over a page of vmstat data or when pgdat changes.
@@ -2971,29 +3014,35 @@ static void __account_obj_stock(struct obj_cgroup *objcg,
nr = 0;
}
}
+direct:
if (nr)
mod_objcg_mlstate(objcg, pgdat, idx, nr);
}
-static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
- struct pglist_data *pgdat, enum node_stat_item idx)
+static bool __consume_obj_stock(struct obj_cgroup *objcg,
+ struct obj_stock_pcp *stock,
+ unsigned int nr_bytes)
+{
+ if (objcg == READ_ONCE(stock->cached_objcg) &&
+ stock->nr_bytes >= nr_bytes) {
+ stock->nr_bytes -= nr_bytes;
+ return true;
+ }
+
+ return false;
+}
+
+static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
{
struct obj_stock_pcp *stock;
bool ret = false;
- if (!local_trylock(&obj_stock.lock))
+ stock = trylock_stock();
+ if (!stock)
return ret;
- stock = this_cpu_ptr(&obj_stock);
- if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
- stock->nr_bytes -= nr_bytes;
- ret = true;
-
- if (pgdat)
- __account_obj_stock(objcg, stock, nr_bytes, pgdat, idx);
- }
-
- local_unlock(&obj_stock.lock);
+ ret = __consume_obj_stock(objcg, stock, nr_bytes);
+ unlock_stock(stock);
return ret;
}
@@ -3077,23 +3126,20 @@ static bool obj_stock_flush_required(struct obj_stock_pcp *stock,
return flush;
}
-static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
- bool allow_uncharge, int nr_acct, struct pglist_data *pgdat,
- enum node_stat_item idx)
+static void __refill_obj_stock(struct obj_cgroup *objcg,
+ struct obj_stock_pcp *stock,
+ unsigned int nr_bytes,
+ bool allow_uncharge)
{
- struct obj_stock_pcp *stock;
unsigned int nr_pages = 0;
- if (!local_trylock(&obj_stock.lock)) {
- if (pgdat)
- mod_objcg_mlstate(objcg, pgdat, idx, nr_acct);
+ if (!stock) {
nr_pages = nr_bytes >> PAGE_SHIFT;
nr_bytes = nr_bytes & (PAGE_SIZE - 1);
atomic_add(nr_bytes, &objcg->nr_charged_bytes);
goto out;
}
- stock = this_cpu_ptr(&obj_stock);
if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
drain_obj_stock(stock);
obj_cgroup_get(objcg);
@@ -3105,27 +3151,45 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
}
stock->nr_bytes += nr_bytes;
- if (pgdat)
- __account_obj_stock(objcg, stock, nr_acct, pgdat, idx);
-
if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) {
nr_pages = stock->nr_bytes >> PAGE_SHIFT;
stock->nr_bytes &= (PAGE_SIZE - 1);
}
- local_unlock(&obj_stock.lock);
out:
if (nr_pages)
obj_cgroup_uncharge_pages(objcg, nr_pages);
}
-static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t size,
- struct pglist_data *pgdat, enum node_stat_item idx)
+static void refill_obj_stock(struct obj_cgroup *objcg,
+ unsigned int nr_bytes,
+ bool allow_uncharge)
+{
+ struct obj_stock_pcp *stock = trylock_stock();
+ __refill_obj_stock(objcg, stock, nr_bytes, allow_uncharge);
+ unlock_stock(stock);
+}
+
+static int __obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp,
+ size_t size, size_t *remainder)
+{
+ size_t charge_size;
+ int ret;
+
+ charge_size = PAGE_ALIGN(size);
+ ret = obj_cgroup_charge_pages(objcg, gfp, charge_size >> PAGE_SHIFT);
+ if (!ret)
+ *remainder = charge_size - size;
+
+ return ret;
+}
+
+int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
{
- unsigned int nr_pages, nr_bytes;
+ size_t remainder;
int ret;
- if (likely(consume_obj_stock(objcg, size, pgdat, idx)))
+ if (likely(consume_obj_stock(objcg, size)))
return 0;
/*
@@ -3151,28 +3215,16 @@ static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t
* bytes is (sizeof(object) + PAGE_SIZE - 2) if there is no data
* race.
*/
- nr_pages = size >> PAGE_SHIFT;
- nr_bytes = size & (PAGE_SIZE - 1);
-
- if (nr_bytes)
- nr_pages += 1;
-
- ret = obj_cgroup_charge_pages(objcg, gfp, nr_pages);
- if (!ret && (nr_bytes || pgdat))
- refill_obj_stock(objcg, nr_bytes ? PAGE_SIZE - nr_bytes : 0,
- false, size, pgdat, idx);
+ ret = __obj_cgroup_charge(objcg, gfp, size, &remainder);
+ if (!ret && remainder)
+ refill_obj_stock(objcg, remainder, false);
return ret;
}
-int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
-{
- return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0);
-}
-
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
{
- refill_obj_stock(objcg, size, true, 0, NULL, 0);
+ refill_obj_stock(objcg, size, true);
}
static inline size_t obj_full_size(struct kmem_cache *s)
@@ -3187,6 +3239,7 @@ static inline size_t obj_full_size(struct kmem_cache *s)
bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
gfp_t flags, size_t size, void **p)
{
+ size_t obj_size = obj_full_size(s);
struct obj_cgroup *objcg;
struct slab *slab;
unsigned long off;
@@ -3227,6 +3280,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
for (i = 0; i < size; i++) {
unsigned long obj_exts;
struct slabobj_ext *obj_ext;
+ struct obj_stock_pcp *stock;
slab = virt_to_slab(p[i]);
@@ -3246,9 +3300,20 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
* TODO: we could batch this until slab_pgdat(slab) changes
* between iterations, with a more complicated undo
*/
- if (obj_cgroup_charge_account(objcg, flags, obj_full_size(s),
- slab_pgdat(slab), cache_vmstat_idx(s)))
- return false;
+ stock = trylock_stock();
+ if (!stock || !__consume_obj_stock(objcg, stock, obj_size)) {
+ size_t remainder;
+
+ unlock_stock(stock);
+ if (__obj_cgroup_charge(objcg, flags, obj_size, &remainder))
+ return false;
+ stock = trylock_stock();
+ if (remainder)
+ __refill_obj_stock(objcg, stock, remainder, false);
+ }
+ __account_obj_stock(objcg, stock, obj_size,
+ slab_pgdat(slab), cache_vmstat_idx(s));
+ unlock_stock(stock);
obj_exts = slab_obj_exts(slab);
get_slab_obj_exts(obj_exts);
@@ -3270,6 +3335,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
for (int i = 0; i < objects; i++) {
struct obj_cgroup *objcg;
struct slabobj_ext *obj_ext;
+ struct obj_stock_pcp *stock;
unsigned int off;
off = obj_to_index(s, slab, p[i]);
@@ -3279,8 +3345,13 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
continue;
obj_ext->objcg = NULL;
- refill_obj_stock(objcg, obj_size, true, -obj_size,
- slab_pgdat(slab), cache_vmstat_idx(s));
+
+ stock = trylock_stock();
+ __refill_obj_stock(objcg, stock, obj_size, true);
+ __account_obj_stock(objcg, stock, -obj_size,
+ slab_pgdat(slab), cache_vmstat_idx(s));
+ unlock_stock(stock);
+
obj_cgroup_put(objcg);
}
}
@@ -3612,13 +3683,7 @@ static void mem_cgroup_private_id_remove(struct mem_cgroup *memcg)
}
}
-void __maybe_unused mem_cgroup_private_id_get_many(struct mem_cgroup *memcg,
- unsigned int n)
-{
- refcount_add(n, &memcg->id.ref);
-}
-
-static void mem_cgroup_private_id_put_many(struct mem_cgroup *memcg, unsigned int n)
+static inline void mem_cgroup_private_id_put(struct mem_cgroup *memcg, unsigned int n)
{
if (refcount_sub_and_test(n, &memcg->id.ref)) {
mem_cgroup_private_id_remove(memcg);
@@ -3628,14 +3693,9 @@ static void mem_cgroup_private_id_put_many(struct mem_cgroup *memcg, unsigned in
}
}
-static inline void mem_cgroup_private_id_put(struct mem_cgroup *memcg)
+struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg, unsigned int n)
{
- mem_cgroup_private_id_put_many(memcg, 1);
-}
-
-struct mem_cgroup *mem_cgroup_private_id_get_online(struct mem_cgroup *memcg)
-{
- while (!refcount_inc_not_zero(&memcg->id.ref)) {
+ while (!refcount_add_not_zero(n, &memcg->id.ref)) {
/*
* The root cgroup cannot be destroyed, so it's refcount must
* always be >= 1.
@@ -3935,7 +3995,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
drain_all_stock(memcg);
- mem_cgroup_private_id_put(memcg);
+ mem_cgroup_private_id_put(memcg, 1);
}
static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
@@ -5225,19 +5285,15 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
return 0;
}
- memcg = mem_cgroup_private_id_get_online(memcg);
+ memcg = mem_cgroup_private_id_get_online(memcg, nr_pages);
if (!mem_cgroup_is_root(memcg) &&
!page_counter_try_charge(&memcg->swap, nr_pages, &counter)) {
memcg_memory_event(memcg, MEMCG_SWAP_MAX);
memcg_memory_event(memcg, MEMCG_SWAP_FAIL);
- mem_cgroup_private_id_put(memcg);
+ mem_cgroup_private_id_put(memcg, nr_pages);
return -ENOMEM;
}
-
- /* Get references for the tail pages, too */
- if (nr_pages > 1)
- mem_cgroup_private_id_get_many(memcg, nr_pages - 1);
mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
swap_cgroup_record(folio, mem_cgroup_private_id(memcg), entry);
@@ -5266,7 +5322,7 @@ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
page_counter_uncharge(&memcg->swap, nr_pages);
}
mod_memcg_state(memcg, MEMCG_SWAP, -nr_pages);
- mem_cgroup_private_id_put_many(memcg, nr_pages);
+ mem_cgroup_private_id_put(memcg, nr_pages);
}
rcu_read_unlock();
}
@@ -5513,6 +5569,8 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
+ if (size == PAGE_SIZE)
+ mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, 1);
rcu_read_unlock();
}
@@ -5536,6 +5594,8 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
+ if (size == PAGE_SIZE)
+ mod_memcg_state(memcg, MEMCG_ZSWAP_INCOMP, -1);
rcu_read_unlock();
}