summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 19:29:45 -0800
commite2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch)
treef7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /drivers/block
parent7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff)
parentc45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff)
downloadlwn-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.gz
lwn-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.zip
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - More userfaultfs work from Peter Xu - Several convert-to-folios series from Sidhartha Kumar and Huang Ying - Some filemap cleanups from Vishal Moola - David Hildenbrand added the ability to selftest anon memory COW handling - Some cpuset simplifications from Liu Shixin - Addition of vmalloc tracing support by Uladzislau Rezki - Some pagecache folioifications and simplifications from Matthew Wilcox - A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use it - Miguel Ojeda contributed some cleanups for our use of the __no_sanitize_thread__ gcc keyword. This series should have been in the non-MM tree, my bad - Naoya Horiguchi improved the interaction between memory poisoning and memory section removal for huge pages - DAMON cleanups and tuneups from SeongJae Park - Tony Luck fixed the handling of COW faults against poisoned pages - Peter Xu utilized the PTE marker code for handling swapin errors - Hugh Dickins reworked compound page mapcount handling, simplifying it and making it more efficient - Removal of the autonuma savedwrite infrastructure from Nadav Amit and David Hildenbrand - zram support for multiple compression streams from Sergey Senozhatsky - David Hildenbrand reworked the GUP code's R/O long-term pinning so that drivers no longer need to use the FOLL_FORCE workaround which didn't work very well anyway - Mel Gorman altered the page allocator so that local IRQs can remnain enabled during per-cpu page allocations - Vishal Moola removed the try_to_release_page() wrapper - Stefan Roesch added some per-BDI sysfs tunables which are used to prevent network block devices from dirtying excessive amounts of pagecache - David Hildenbrand did some cleanup and repair work on KSM COW breaking - Nhat Pham and Johannes Weiner have implemented writeback in zswap's zsmalloc backend - Brian Foster has fixed a longstanding corner-case oddity in file[map]_write_and_wait_range() - sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang Chen - Shiyang Ruan has done some work on fsdax, to make its reflink mode work better under xfstests. Better, but still not perfect - Christoph Hellwig has removed the .writepage() method from several filesystems. They only need .writepages() - Yosry Ahmed wrote a series which fixes the memcg reclaim target beancounting - David Hildenbrand has fixed some of our MM selftests for 32-bit machines - Many singleton patches, as usual * tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits) mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio mm: mmu_gather: allow more than one batch of delayed rmaps mm: fix typo in struct pglist_data code comment kmsan: fix memcpy tests mm: add cond_resched() in swapin_walk_pmd_entry() mm: do not show fs mm pc for VM_LOCKONFAULT pages selftests/vm: ksm_functional_tests: fixes for 32bit selftests/vm: cow: fix compile warning on 32bit selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem mm,thp,rmap: fix races between updates of subpages_mapcount mm: memcg: fix swapcached stat accounting mm: add nodes= arg to memory.reclaim mm: disable top-tier fallback to reclaim on proactive reclaim selftests: cgroup: make sure reclaim target memcg is unprotected selftests: cgroup: refactor proactive reclaim code to reclaim_until() mm: memcg: fix stale protection of reclaim target memcg mm/mmap: properly unaccount memory on mas_preallocate() failure omfs: remove ->writepage jfs: remove ->writepage ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/zram/Kconfig9
-rw-r--r--drivers/block/zram/zcomp.c6
-rw-r--r--drivers/block/zram/zcomp.h2
-rw-r--r--drivers/block/zram/zram_drv.c619
-rw-r--r--drivers/block/zram/zram_drv.h24
5 files changed, 571 insertions, 89 deletions
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index d4100b0c083e..0386b7da02aa 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING
/sys/kernel/debug/zram/zramX/block_state.
See Documentation/admin-guide/blockdev/zram.rst for more information.
+
+config ZRAM_MULTI_COMP
+ bool "Enable multiple compression streams"
+ depends on ZRAM
+ help
+ This will enable multi-compression streams, so that ZRAM can
+ re-compress pages using a potentially slower but more effective
+ compression algorithm. Note, that IDLE page recompression
+ requires ZRAM_MEMORY_TRACKING.
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 0916de952e09..55af4efd7983 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp)
* case of allocation error, or any other error potentially
* returned by zcomp_init().
*/
-struct zcomp *zcomp_create(const char *compress)
+struct zcomp *zcomp_create(const char *alg)
{
struct zcomp *comp;
int error;
@@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress)
* is not loaded yet. We must do it here, otherwise we are about to
* call /sbin/modprobe under CPU hot-plug lock.
*/
- if (!zcomp_available_algorithm(compress))
+ if (!zcomp_available_algorithm(alg))
return ERR_PTR(-EINVAL);
comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
if (!comp)
return ERR_PTR(-ENOMEM);
- comp->name = compress;
+ comp->name = alg;
error = zcomp_init(comp);
if (error) {
kfree(comp);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 40f6420f4b2e..cdefdef93da8 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node);
ssize_t zcomp_available_show(const char *comp, char *buf);
bool zcomp_available_algorithm(const char *comp);
-struct zcomp *zcomp_create(const char *comp);
+struct zcomp *zcomp_create(const char *alg);
void zcomp_destroy(struct zcomp *comp);
struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 966aab902d19..e290d6d97047 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec)
}
#endif
+static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
+{
+ prio &= ZRAM_COMP_PRIORITY_MASK;
+ /*
+ * Clear previous priority value first, in case if we recompress
+ * further an already recompressed page
+ */
+ zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
+ ZRAM_COMP_PRIORITY_BIT1);
+ zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
+}
+
+static inline u32 zram_get_priority(struct zram *zram, u32 index)
+{
+ u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
+
+ return prio & ZRAM_COMP_PRIORITY_MASK;
+}
+
/*
* Check if request is within bounds and aligned on zram logical blocks.
*/
@@ -188,16 +207,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
- unsigned long old_max, cur_max;
-
- old_max = atomic_long_read(&zram->stats.max_used_pages);
+ unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
do {
- cur_max = old_max;
- if (pages > cur_max)
- old_max = atomic_long_cmpxchg(
- &zram->stats.max_used_pages, cur_max, pages);
- } while (old_max != cur_max);
+ if (cur_max >= pages)
+ return;
+ } while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
+ &cur_max, pages));
}
static inline void zram_fill_page(void *ptr, unsigned long len,
@@ -629,10 +645,10 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
#define PAGE_WB_SIG "page_index="
-#define PAGE_WRITEBACK 0
-#define HUGE_WRITEBACK (1<<0)
-#define IDLE_WRITEBACK (1<<1)
-
+#define PAGE_WRITEBACK 0
+#define HUGE_WRITEBACK (1<<0)
+#define IDLE_WRITEBACK (1<<1)
+#define INCOMPRESSIBLE_WRITEBACK (1<<2)
static ssize_t writeback_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
@@ -653,6 +669,8 @@ static ssize_t writeback_store(struct device *dev,
mode = HUGE_WRITEBACK;
else if (sysfs_streq(buf, "huge_idle"))
mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
+ else if (sysfs_streq(buf, "incompressible"))
+ mode = INCOMPRESSIBLE_WRITEBACK;
else {
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
return -EINVAL;
@@ -715,11 +733,15 @@ static ssize_t writeback_store(struct device *dev,
goto next;
if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
+ !zram_test_flag(zram, index, ZRAM_IDLE))
goto next;
if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+ if (mode & INCOMPRESSIBLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
goto next;
+
/*
* Clearing ZRAM_UNDER_WB is duty of caller.
* IOW, zram_free_page never clear it.
@@ -753,8 +775,12 @@ static ssize_t writeback_store(struct device *dev,
zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
/*
- * Return last IO error unless every IO were
- * not suceeded.
+ * BIO errors are not fatal, we continue and simply
+ * attempt to writeback the remaining objects (pages).
+ * At the same time we need to signal user-space that
+ * some writes (at least one, but also could be all of
+ * them) were not successful and we do so by returning
+ * the most recent BIO error.
*/
ret = err;
continue;
@@ -920,13 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
ts = ktime_to_timespec64(zram->table[index].ac_time);
copied = snprintf(kbuf + written, count,
- "%12zd %12lld.%06lu %c%c%c%c\n",
+ "%12zd %12lld.%06lu %c%c%c%c%c%c\n",
index, (s64)ts.tv_sec,
ts.tv_nsec / NSEC_PER_USEC,
zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
- zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+ zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
+ zram_get_priority(zram, index) ? 'r' : '.',
+ zram_test_flag(zram, index,
+ ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
if (count <= copied) {
zram_slot_unlock(zram, index);
@@ -1000,46 +1029,143 @@ static ssize_t max_comp_streams_store(struct device *dev,
return len;
}
-static ssize_t comp_algorithm_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
{
- size_t sz;
- struct zram *zram = dev_to_zram(dev);
+ /* Do not free statically defined compression algorithms */
+ if (zram->comp_algs[prio] != default_compressor)
+ kfree(zram->comp_algs[prio]);
+
+ zram->comp_algs[prio] = alg;
+}
+
+static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
+{
+ ssize_t sz;
down_read(&zram->init_lock);
- sz = zcomp_available_show(zram->compressor, buf);
+ sz = zcomp_available_show(zram->comp_algs[prio], buf);
up_read(&zram->init_lock);
return sz;
}
-static ssize_t comp_algorithm_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
{
- struct zram *zram = dev_to_zram(dev);
- char compressor[ARRAY_SIZE(zram->compressor)];
+ char *compressor;
size_t sz;
- strscpy(compressor, buf, sizeof(compressor));
+ sz = strlen(buf);
+ if (sz >= CRYPTO_MAX_ALG_NAME)
+ return -E2BIG;
+
+ compressor = kstrdup(buf, GFP_KERNEL);
+ if (!compressor)
+ return -ENOMEM;
+
/* ignore trailing newline */
- sz = strlen(compressor);
if (sz > 0 && compressor[sz - 1] == '\n')
compressor[sz - 1] = 0x00;
- if (!zcomp_available_algorithm(compressor))
+ if (!zcomp_available_algorithm(compressor)) {
+ kfree(compressor);
return -EINVAL;
+ }
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
+ kfree(compressor);
pr_info("Can't change algorithm for initialized device\n");
return -EBUSY;
}
- strcpy(zram->compressor, compressor);
+ comp_algorithm_set(zram, prio, compressor);
up_write(&zram->init_lock);
- return len;
+ return 0;
+}
+
+static ssize_t comp_algorithm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
+}
+
+static ssize_t comp_algorithm_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ int ret;
+
+ ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
+ return ret ? ret : len;
+}
+
+#ifdef CONFIG_ZRAM_MULTI_COMP
+static ssize_t recomp_algorithm_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t sz = 0;
+ u32 prio;
+
+ for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+
+ sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
+ sz += __comp_algorithm_show(zram, prio, buf + sz);
+ }
+
+ return sz;
+}
+
+static ssize_t recomp_algorithm_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ int prio = ZRAM_SECONDARY_COMP;
+ char *args, *param, *val;
+ char *alg = NULL;
+ int ret;
+
+ args = skip_spaces(buf);
+ while (*args) {
+ args = next_arg(args, &param, &val);
+
+ if (!*val)
+ return -EINVAL;
+
+ if (!strcmp(param, "algo")) {
+ alg = val;
+ continue;
+ }
+
+ if (!strcmp(param, "priority")) {
+ ret = kstrtoint(val, 10, &prio);
+ if (ret)
+ return ret;
+ continue;
+ }
+ }
+
+ if (!alg)
+ return -EINVAL;
+
+ if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
+ return -EINVAL;
+
+ ret = __comp_algorithm_store(zram, prio, alg);
+ return ret ? ret : len;
}
+#endif
static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
@@ -1210,6 +1336,11 @@ static void zram_free_page(struct zram *zram, size_t index)
atomic64_dec(&zram->stats.huge_pages);
}
+ if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+
+ zram_set_priority(zram, index, 0);
+
if (zram_test_flag(zram, index, ZRAM_WB)) {
zram_clear_flag(zram, index, ZRAM_WB);
free_block_bdev(zram, zram_get_element(zram, index));
@@ -1242,32 +1373,37 @@ out:
~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
}
-static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
- struct bio *bio, bool partial_io)
+/*
+ * Reads a page from the writeback devices. Corresponding ZRAM slot
+ * should be unlocked.
+ */
+static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
+ u32 index, struct bio *bio, bool partial_io)
+{
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_len = PAGE_SIZE,
+ .bv_offset = 0,
+ };
+
+ return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
+ partial_io);
+}
+
+/*
+ * Reads (decompresses if needed) a page from zspool (zsmalloc).
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_read_from_zspool(struct zram *zram, struct page *page,
+ u32 index)
{
struct zcomp_strm *zstrm;
unsigned long handle;
unsigned int size;
void *src, *dst;
+ u32 prio;
int ret;
- zram_slot_lock(zram, index);
- if (zram_test_flag(zram, index, ZRAM_WB)) {
- struct bio_vec bvec;
-
- zram_slot_unlock(zram, index);
- /* A null bio means rw_page was used, we must fallback to bio */
- if (!bio)
- return -EOPNOTSUPP;
-
- bvec.bv_page = page;
- bvec.bv_len = PAGE_SIZE;
- bvec.bv_offset = 0;
- return read_from_bdev(zram, &bvec,
- zram_get_element(zram, index),
- bio, partial_io);
- }
-
handle = zram_get_handle(zram, index);
if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
unsigned long value;
@@ -1277,14 +1413,15 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
mem = kmap_atomic(page);
zram_fill_page(mem, PAGE_SIZE, value);
kunmap_atomic(mem);
- zram_slot_unlock(zram, index);
return 0;
}
size = zram_get_obj_size(zram, index);
- if (size != PAGE_SIZE)
- zstrm = zcomp_stream_get(zram->comp);
+ if (size != PAGE_SIZE) {
+ prio = zram_get_priority(zram, index);
+ zstrm = zcomp_stream_get(zram->comps[prio]);
+ }
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
@@ -1296,20 +1433,43 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
dst = kmap_atomic(page);
ret = zcomp_decompress(zstrm, src, size, dst);
kunmap_atomic(dst);
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[prio]);
}
zs_unmap_object(zram->mem_pool, handle);
- zram_slot_unlock(zram, index);
+ return ret;
+}
+
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+ struct bio *bio, bool partial_io)
+{
+ int ret;
+
+ zram_slot_lock(zram, index);
+ if (!zram_test_flag(zram, index, ZRAM_WB)) {
+ /* Slot should be locked through out the function call */
+ ret = zram_read_from_zspool(zram, page, index);
+ zram_slot_unlock(zram, index);
+ } else {
+ /* Slot should be unlocked before the function call */
+ zram_slot_unlock(zram, index);
+
+ /* A null bio means rw_page was used, we must fallback to bio */
+ if (!bio)
+ return -EOPNOTSUPP;
+
+ ret = zram_bvec_read_from_bdev(zram, page, index, bio,
+ partial_io);
+ }
/* Should NEVER happen. Return bio error if it does. */
- if (WARN_ON(ret))
+ if (WARN_ON(ret < 0))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
return ret;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset, struct bio *bio)
+ u32 index, int offset, struct bio *bio)
{
int ret;
struct page *page;
@@ -1363,13 +1523,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
kunmap_atomic(mem);
compress_again:
- zstrm = zcomp_stream_get(zram->comp);
+ zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
kunmap_atomic(src);
if (unlikely(ret)) {
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
pr_err("Compression failed! err=%d\n", ret);
zs_free(zram->mem_pool, handle);
return ret;
@@ -1390,19 +1550,19 @@ compress_again:
* if we have a 'non-null' handle here then we are coming
* from the slow path and handle has already been allocated.
*/
- if (IS_ERR((void *)handle))
+ if (IS_ERR_VALUE(handle))
handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
__GFP_MOVABLE);
- if (IS_ERR((void *)handle)) {
- zcomp_stream_put(zram->comp);
+ if (IS_ERR_VALUE(handle)) {
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
atomic64_inc(&zram->stats.writestall);
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
- if (IS_ERR((void *)handle))
+ if (IS_ERR_VALUE(handle))
return PTR_ERR((void *)handle);
if (comp_len != PAGE_SIZE)
@@ -1414,14 +1574,14 @@ compress_again:
* zstrm buffer back. It is necessary that the dereferencing
* of the zstrm variable below occurs correctly.
*/
- zstrm = zcomp_stream_get(zram->comp);
+ zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
}
alloced_pages = zs_get_total_pages(zram->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
@@ -1435,7 +1595,7 @@ compress_again:
if (comp_len == PAGE_SIZE)
kunmap_atomic(src);
- zcomp_stream_put(zram->comp);
+ zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
zs_unmap_object(zram->mem_pool, handle);
atomic64_add(comp_len, &zram->stats.compr_data_size);
out:
@@ -1504,6 +1664,274 @@ out:
return ret;
}
+#ifdef CONFIG_ZRAM_MULTI_COMP
+/*
+ * This function will decompress (unless it's ZRAM_HUGE) the page and then
+ * attempt to compress it using provided compression algorithm priority
+ * (which is potentially more effective).
+ *
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_recompress(struct zram *zram, u32 index, struct page *page,
+ u32 threshold, u32 prio, u32 prio_max)
+{
+ struct zcomp_strm *zstrm = NULL;
+ unsigned long handle_old;
+ unsigned long handle_new;
+ unsigned int comp_len_old;
+ unsigned int comp_len_new;
+ unsigned int class_index_old;
+ unsigned int class_index_new;
+ u32 num_recomps = 0;
+ void *src, *dst;
+ int ret;
+
+ handle_old = zram_get_handle(zram, index);
+ if (!handle_old)
+ return -EINVAL;
+
+ comp_len_old = zram_get_obj_size(zram, index);
+ /*
+ * Do not recompress objects that are already "small enough".
+ */
+ if (comp_len_old < threshold)
+ return 0;
+
+ ret = zram_read_from_zspool(zram, page, index);
+ if (ret)
+ return ret;
+
+ class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
+ /*
+ * Iterate the secondary comp algorithms list (in order of priority)
+ * and try to recompress the page.
+ */
+ for (; prio < prio_max; prio++) {
+ if (!zram->comps[prio])
+ continue;
+
+ /*
+ * Skip if the object is already re-compressed with a higher
+ * priority algorithm (or same algorithm).
+ */
+ if (prio <= zram_get_priority(zram, index))
+ continue;
+
+ num_recomps++;
+ zstrm = zcomp_stream_get(zram->comps[prio]);
+ src = kmap_atomic(page);
+ ret = zcomp_compress(zstrm, src, &comp_len_new);
+ kunmap_atomic(src);
+
+ if (ret) {
+ zcomp_stream_put(zram->comps[prio]);
+ return ret;
+ }
+
+ class_index_new = zs_lookup_class_index(zram->mem_pool,
+ comp_len_new);
+
+ /* Continue until we make progress */
+ if (class_index_new >= class_index_old ||
+ (threshold && comp_len_new >= threshold)) {
+ zcomp_stream_put(zram->comps[prio]);
+ continue;
+ }
+
+ /* Recompression was successful so break out */
+ break;
+ }
+
+ /*
+ * We did not try to recompress, e.g. when we have only one
+ * secondary algorithm and the page is already recompressed
+ * using that algorithm
+ */
+ if (!zstrm)
+ return 0;
+
+ if (class_index_new >= class_index_old) {
+ /*
+ * Secondary algorithms failed to re-compress the page
+ * in a way that would save memory, mark the object as
+ * incompressible so that we will not try to compress
+ * it again.
+ *
+ * We need to make sure that all secondary algorithms have
+ * failed, so we test if the number of recompressions matches
+ * the number of active secondary algorithms.
+ */
+ if (num_recomps == zram->num_active_comps - 1)
+ zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+ return 0;
+ }
+
+ /* Successful recompression but above threshold */
+ if (threshold && comp_len_new >= threshold)
+ return 0;
+
+ /*
+ * No direct reclaim (slow path) for handle allocation and no
+ * re-compression attempt (unlike in __zram_bvec_write()) since
+ * we already have stored that object in zsmalloc. If we cannot
+ * alloc memory for recompressed object then we bail out and
+ * simply keep the old (existing) object in zsmalloc.
+ */
+ handle_new = zs_malloc(zram->mem_pool, comp_len_new,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (IS_ERR_VALUE(handle_new)) {
+ zcomp_stream_put(zram->comps[prio]);
+ return PTR_ERR((void *)handle_new);
+ }
+
+ dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
+ memcpy(dst, zstrm->buffer, comp_len_new);
+ zcomp_stream_put(zram->comps[prio]);
+
+ zs_unmap_object(zram->mem_pool, handle_new);
+
+ zram_free_page(zram, index);
+ zram_set_handle(zram, index, handle_new);
+ zram_set_obj_size(zram, index, comp_len_new);
+ zram_set_priority(zram, index, prio);
+
+ atomic64_add(comp_len_new, &zram->stats.compr_data_size);
+ atomic64_inc(&zram->stats.pages_stored);
+
+ return 0;
+}
+
+#define RECOMPRESS_IDLE (1 << 0)
+#define RECOMPRESS_HUGE (1 << 1)
+
+static ssize_t recompress_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
+ struct zram *zram = dev_to_zram(dev);
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ char *args, *param, *val, *algo = NULL;
+ u32 mode = 0, threshold = 0;
+ unsigned long index;
+ struct page *page;
+ ssize_t ret;
+
+ args = skip_spaces(buf);
+ while (*args) {
+ args = next_arg(args, &param, &val);
+
+ if (!*val)
+ return -EINVAL;
+
+ if (!strcmp(param, "type")) {
+ if (!strcmp(val, "idle"))
+ mode = RECOMPRESS_IDLE;
+ if (!strcmp(val, "huge"))
+ mode = RECOMPRESS_HUGE;
+ if (!strcmp(val, "huge_idle"))
+ mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
+ continue;
+ }
+
+ if (!strcmp(param, "threshold")) {
+ /*
+ * We will re-compress only idle objects equal or
+ * greater in size than watermark.
+ */
+ ret = kstrtouint(val, 10, &threshold);
+ if (ret)
+ return ret;
+ continue;
+ }
+
+ if (!strcmp(param, "algo")) {
+ algo = val;
+ continue;
+ }
+ }
+
+ if (threshold >= PAGE_SIZE)
+ return -EINVAL;
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ ret = -EINVAL;
+ goto release_init_lock;
+ }
+
+ if (algo) {
+ bool found = false;
+
+ for (; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+
+ if (!strcmp(zram->comp_algs[prio], algo)) {
+ prio_max = min(prio + 1, ZRAM_MAX_COMPS);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ ret = -EINVAL;
+ goto release_init_lock;
+ }
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ ret = len;
+ for (index = 0; index < nr_pages; index++) {
+ int err = 0;
+
+ zram_slot_lock(zram, index);
+
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (mode & RECOMPRESS_IDLE &&
+ !zram_test_flag(zram, index, ZRAM_IDLE))
+ goto next;
+
+ if (mode & RECOMPRESS_HUGE &&
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME) ||
+ zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ goto next;
+
+ err = zram_recompress(zram, index, page, threshold,
+ prio, prio_max);
+next:
+ zram_slot_unlock(zram, index);
+ if (err) {
+ ret = err;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ __free_page(page);
+
+release_init_lock:
+ up_read(&zram->init_lock);
+ return ret;
+}
+#endif
+
/*
* zram_bio_discard - handler on discard request
* @index: physical block index in PAGE_SIZE units
@@ -1553,11 +1981,9 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int ret;
if (!op_is_write(op)) {
- atomic64_inc(&zram->stats.num_reads);
ret = zram_bvec_read(zram, bvec, index, offset, bio);
flush_dcache_page(bvec->bv_page);
} else {
- atomic64_inc(&zram->stats.num_writes);
ret = zram_bvec_write(zram, bvec, index, offset, bio);
}
@@ -1710,6 +2136,21 @@ out:
return ret;
}
+static void zram_destroy_comps(struct zram *zram)
+{
+ u32 prio;
+
+ for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+ struct zcomp *comp = zram->comps[prio];
+
+ zram->comps[prio] = NULL;
+ if (!comp)
+ continue;
+ zcomp_destroy(comp);
+ zram->num_active_comps--;
+ }
+}
+
static void zram_reset_device(struct zram *zram)
{
down_write(&zram->init_lock);
@@ -1727,11 +2168,11 @@ static void zram_reset_device(struct zram *zram)
/* I/O operation under all of CPU are done so let's free */
zram_meta_free(zram, zram->disksize);
zram->disksize = 0;
+ zram_destroy_comps(zram);
memset(&zram->stats, 0, sizeof(zram->stats));
- zcomp_destroy(zram->comp);
- zram->comp = NULL;
reset_bdev(zram);
+ comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
up_write(&zram->init_lock);
}
@@ -1742,6 +2183,7 @@ static ssize_t disksize_store(struct device *dev,
struct zcomp *comp;
struct zram *zram = dev_to_zram(dev);
int err;
+ u32 prio;
disksize = memparse(buf, NULL);
if (!disksize)
@@ -1760,22 +2202,29 @@ static ssize_t disksize_store(struct device *dev,
goto out_unlock;
}
- comp = zcomp_create(zram->compressor);
- if (IS_ERR(comp)) {
- pr_err("Cannot initialise %s compressing backend\n",
- zram->compressor);
- err = PTR_ERR(comp);
- goto out_free_meta;
- }
+ for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+ if (!zram->comp_algs[prio])
+ continue;
+
+ comp = zcomp_create(zram->comp_algs[prio]);
+ if (IS_ERR(comp)) {
+ pr_err("Cannot initialise %s compressing backend\n",
+ zram->comp_algs[prio]);
+ err = PTR_ERR(comp);
+ goto out_free_comps;
+ }
- zram->comp = comp;
+ zram->comps[prio] = comp;
+ zram->num_active_comps++;
+ }
zram->disksize = disksize;
set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
up_write(&zram->init_lock);
return len;
-out_free_meta:
+out_free_comps:
+ zram_destroy_comps(zram);
zram_meta_free(zram, disksize);
out_unlock:
up_write(&zram->init_lock);
@@ -1860,6 +2309,10 @@ static DEVICE_ATTR_WO(writeback);
static DEVICE_ATTR_RW(writeback_limit);
static DEVICE_ATTR_RW(writeback_limit_enable);
#endif
+#ifdef CONFIG_ZRAM_MULTI_COMP
+static DEVICE_ATTR_RW(recomp_algorithm);
+static DEVICE_ATTR_WO(recompress);
+#endif
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1883,6 +2336,10 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_bd_stat.attr,
#endif
&dev_attr_debug_stat.attr,
+#ifdef CONFIG_ZRAM_MULTI_COMP
+ &dev_attr_recomp_algorithm.attr,
+ &dev_attr_recompress.attr,
+#endif
NULL,
};
@@ -1962,7 +2419,7 @@ static int zram_add(void)
if (ret)
goto out_cleanup_disk;
- strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
+ comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index a2bda53020fd..c5254626f051 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -40,6 +40,9 @@
*/
#define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
+/* Only 2 bits are allowed for comp priority index */
+#define ZRAM_COMP_PRIORITY_MASK 0x3
+
/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
/* zram slot is locked */
@@ -49,6 +52,10 @@ enum zram_pageflags {
ZRAM_UNDER_WB, /* page is under writeback */
ZRAM_HUGE, /* Incompressible page */
ZRAM_IDLE, /* not accessed page since last idle marking */
+ ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
+
+ ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */
+ ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */
__NR_ZRAM_PAGEFLAGS,
};
@@ -69,8 +76,6 @@ struct zram_table_entry {
struct zram_stats {
atomic64_t compr_data_size; /* compressed size of pages stored */
- atomic64_t num_reads; /* failed + successful */
- atomic64_t num_writes; /* --do-- */
atomic64_t failed_reads; /* can happen when memory is too low */
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
@@ -89,10 +94,20 @@ struct zram_stats {
#endif
};
+#ifdef CONFIG_ZRAM_MULTI_COMP
+#define ZRAM_PRIMARY_COMP 0U
+#define ZRAM_SECONDARY_COMP 1U
+#define ZRAM_MAX_COMPS 4U
+#else
+#define ZRAM_PRIMARY_COMP 0U
+#define ZRAM_SECONDARY_COMP 0U
+#define ZRAM_MAX_COMPS 1U
+#endif
+
struct zram {
struct zram_table_entry *table;
struct zs_pool *mem_pool;
- struct zcomp *comp;
+ struct zcomp *comps[ZRAM_MAX_COMPS];
struct gendisk *disk;
/* Prevent concurrent execution of device init */
struct rw_semaphore init_lock;
@@ -107,7 +122,8 @@ struct zram {
* we can store in a disk.
*/
u64 disksize; /* bytes */
- char compressor[CRYPTO_MAX_ALG_NAME];
+ const char *comp_algs[ZRAM_MAX_COMPS];
+ s8 num_active_comps;
/*
* zram is claimed so open request will be failed
*/