summaryrefslogtreecommitdiff
path: root/mm/zswap.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2022-05-19 14:08:53 -0700
committerakpm <akpm@linux-foundation.org>2022-05-19 14:08:53 -0700
commitf4840ccfca25db225b3371a8f7b5770febee87c5 (patch)
treec35709fc53e78d36681620d17d410bcc0c5d5785 /mm/zswap.c
parentf6498b776d280b30a4614d8261840961e993c2c8 (diff)
downloadlwn-f4840ccfca25db225b3371a8f7b5770febee87c5.tar.gz
lwn-f4840ccfca25db225b3371a8f7b5770febee87c5.zip
zswap: memcg accounting
Applications can currently escape their cgroup memory containment when zswap is enabled. This patch adds per-cgroup tracking and limiting of zswap backend memory to rectify this. The existing cgroup2 memory.stat file is extended to show zswap statistics analogous to what's in meminfo and vmstat. Furthermore, two new control files, memory.zswap.current and memory.zswap.max, are added to allow tuning zswap usage on a per-workload basis. This is important since not all workloads benefit from zswap equally; some even suffer compared to disk swap when memory contents don't compress well. The optimal size of the zswap pool, and the threshold for writeback, also depends on the size of the workload's warm set. The implementation doesn't use a traditional page_counter transaction. zswap is unconventional as a memory consumer in that we only know the amount of memory to charge once expensive compression has occurred. If zwap is disabled or the limit is already exceeded we obviously don't want to compress page upon page only to reject them all. Instead, the limit is checked against current usage, then we compress and charge. This allows some limit overrun, but not enough to matter in practice. [hannes@cmpxchg.org: fix for CONFIG_SLOB builds] Link: https://lkml.kernel.org/r/YnwD14zxYjUJPc2w@cmpxchg.org [hannes@cmpxchg.org: opt out of cgroups v1] Link: https://lkml.kernel.org/r/Yn6it9mBYFA+/lTb@cmpxchg.org Link: https://lkml.kernel.org/r/20220510152847.230957-7-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Roman Gushchin <guro@fb.com> Cc: Shakeel Butt <shakeelb@google.com> Cc: Seth Jennings <sjenning@redhat.com> Cc: Dan Streetman <ddstreet@ieee.org> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/zswap.c')
-rw-r--r--mm/zswap.c37
1 files changed, 30 insertions, 7 deletions
diff --git a/mm/zswap.c b/mm/zswap.c
index e3c16a70f533..104835b379ec 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -188,6 +188,7 @@ struct zswap_entry {
unsigned long handle;
unsigned long value;
};
+ struct obj_cgroup *objcg;
};
struct zswap_header {
@@ -359,6 +360,10 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
*/
static void zswap_free_entry(struct zswap_entry *entry)
{
+ if (entry->objcg) {
+ obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
+ obj_cgroup_put(entry->objcg);
+ }
if (!entry->length)
atomic_dec(&zswap_same_filled_pages);
else {
@@ -1096,6 +1101,8 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
struct zswap_entry *entry, *dupentry;
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
+ struct obj_cgroup *objcg = NULL;
+ struct zswap_pool *pool;
int ret;
unsigned int hlen, dlen = PAGE_SIZE;
unsigned long handle, value;
@@ -1115,17 +1122,15 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
goto reject;
}
+ objcg = get_obj_cgroup_from_page(page);
+ if (objcg && !obj_cgroup_may_zswap(objcg))
+ goto shrink;
+
/* reclaim space if needed */
if (zswap_is_full()) {
- struct zswap_pool *pool;
-
zswap_pool_limit_hit++;
zswap_pool_reached_full = true;
- pool = zswap_pool_last_get();
- if (pool)
- queue_work(shrink_wq, &pool->shrink_work);
- ret = -ENOMEM;
- goto reject;
+ goto shrink;
}
if (zswap_pool_reached_full) {
@@ -1227,6 +1232,13 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
entry->length = dlen;
insert_entry:
+ entry->objcg = objcg;
+ if (objcg) {
+ obj_cgroup_charge_zswap(objcg, entry->length);
+ /* Account before objcg ref is moved to tree */
+ count_objcg_event(objcg, ZSWPOUT);
+ }
+
/* map */
spin_lock(&tree->lock);
do {
@@ -1253,7 +1265,16 @@ put_dstmem:
freepage:
zswap_entry_cache_free(entry);
reject:
+ if (objcg)
+ obj_cgroup_put(objcg);
return ret;
+
+shrink:
+ pool = zswap_pool_last_get();
+ if (pool)
+ queue_work(shrink_wq, &pool->shrink_work);
+ ret = -ENOMEM;
+ goto reject;
}
/*
@@ -1326,6 +1347,8 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
BUG_ON(ret);
stats:
count_vm_event(ZSWPIN);
+ if (entry->objcg)
+ count_objcg_event(entry->objcg, ZSWPIN);
freeentry:
spin_lock(&tree->lock);
zswap_entry_put(tree, entry);