diff options
Diffstat (limited to 'drivers/block/zram/zram_drv.c')
-rw-r--r-- | drivers/block/zram/zram_drv.c | 382 |
1 files changed, 137 insertions, 245 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d8217529be6f..a84c4268257a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -54,9 +54,8 @@ static size_t huge_class_size; static const struct block_device_operations zram_devops; static void zram_free_page(struct zram *zram, size_t index); -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio); - +static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *parent); static int zram_slot_trylock(struct zram *zram, u32 index) { @@ -148,6 +147,7 @@ static inline bool is_partial_io(struct bio_vec *bvec) { return bvec->bv_len != PAGE_SIZE; } +#define ZRAM_PARTIAL_IO 1 #else static inline bool is_partial_io(struct bio_vec *bvec) { @@ -174,36 +174,6 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index) return prio & ZRAM_COMP_PRIORITY_MASK; } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline bool valid_io_request(struct zram *zram, - sector_t start, unsigned int size) -{ - u64 end, bound; - - /* unaligned request */ - if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return false; - if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return false; - - end = start + (size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range */ - if (unlikely(start >= bound || end > bound || start > end)) - return false; - - /* I/O request is valid */ - return true; -} - -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) -{ - *index += (*offset + bvec->bv_len) / PAGE_SIZE; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; -} - static inline void update_used_max(struct zram *zram, const unsigned long pages) { @@ -606,41 +576,16 @@ static void free_block_bdev(struct zram *zram, unsigned long blk_idx) atomic64_dec(&zram->stats.bd_count); } -static void zram_page_end_io(struct bio *bio) -{ - struct page *page = bio_first_page_all(bio); - - page_endio(page, op_is_write(bio_op(bio)), - blk_status_to_errno(bio->bi_status)); - bio_put(bio); -} - -/* - * Returns 1 if the submission is successful. - */ -static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, +static void read_from_bdev_async(struct zram *zram, struct page *page, unsigned long entry, struct bio *parent) { struct bio *bio; - bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, - GFP_NOIO); - if (!bio) - return -ENOMEM; - + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); - if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { - bio_put(bio); - return -EIO; - } - - if (!parent) - bio->bi_end_io = zram_page_end_io; - else - bio_chain(bio, parent); - + __bio_add_page(bio, page, PAGE_SIZE, 0); + bio_chain(bio, parent); submit_bio(bio); - return 1; } #define PAGE_WB_SIG "page_index=" @@ -701,10 +646,6 @@ static ssize_t writeback_store(struct device *dev, } for (; nr_pages != 0; index++, nr_pages--) { - struct bio_vec bvec; - - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - spin_lock(&zram->wb_limit_lock); if (zram->wb_limit_enable && !zram->bd_wb_limit) { spin_unlock(&zram->wb_limit_lock); @@ -748,7 +689,7 @@ static ssize_t writeback_store(struct device *dev, /* Need for hugepage writeback racing */ zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); - if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { + if (zram_read_page(zram, page, index, NULL)) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); @@ -759,9 +700,8 @@ static ssize_t writeback_store(struct device *dev, bio_init(&bio, zram->bdev, &bio_vec, 1, REQ_OP_WRITE | REQ_SYNC); bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); + bio_add_page(&bio, page, PAGE_SIZE, 0); - bio_add_page(&bio, bvec.bv_page, bvec.bv_len, - bvec.bv_offset); /* * XXX: A single page IO would be inefficient for write * but it would be not bad as starter. @@ -829,19 +769,20 @@ struct zram_work { struct work_struct work; struct zram *zram; unsigned long entry; - struct bio *bio; - struct bio_vec bvec; + struct page *page; + int error; }; -#if PAGE_SIZE != 4096 static void zram_sync_read(struct work_struct *work) { struct zram_work *zw = container_of(work, struct zram_work, work); - struct zram *zram = zw->zram; - unsigned long entry = zw->entry; - struct bio *bio = zw->bio; + struct bio_vec bv; + struct bio bio; - read_from_bdev_async(zram, &zw->bvec, entry, bio); + bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); + bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); + __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); + zw->error = submit_bio_wait(&bio); } /* @@ -849,45 +790,39 @@ static void zram_sync_read(struct work_struct *work) * chained IO with parent IO in same context, it's a deadlock. To avoid that, * use a worker thread context. */ -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *bio) +static int read_from_bdev_sync(struct zram *zram, struct page *page, + unsigned long entry) { struct zram_work work; - work.bvec = *bvec; + work.page = page; work.zram = zram; work.entry = entry; - work.bio = bio; INIT_WORK_ONSTACK(&work.work, zram_sync_read); queue_work(system_unbound_wq, &work.work); flush_work(&work.work); destroy_work_on_stack(&work.work); - return 1; -} -#else -static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *bio) -{ - WARN_ON(1); - return -EIO; + return work.error; } -#endif -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *parent, bool sync) +static int read_from_bdev(struct zram *zram, struct page *page, + unsigned long entry, struct bio *parent) { atomic64_inc(&zram->stats.bd_reads); - if (sync) - return read_from_bdev_sync(zram, bvec, entry, parent); - else - return read_from_bdev_async(zram, bvec, entry, parent); + if (!parent) { + if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) + return -EIO; + return read_from_bdev_sync(zram, page, entry); + } + read_from_bdev_async(zram, page, entry, parent); + return 0; } #else static inline void reset_bdev(struct zram *zram) {}; -static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, - unsigned long entry, struct bio *parent, bool sync) +static int read_from_bdev(struct zram *zram, struct page *page, + unsigned long entry, struct bio *parent) { return -EIO; } @@ -1190,10 +1125,9 @@ static ssize_t io_stat_show(struct device *dev, down_read(&zram->init_lock); ret = scnprintf(buf, PAGE_SIZE, - "%8llu %8llu %8llu %8llu\n", + "%8llu %8llu 0 %8llu\n", (u64)atomic64_read(&zram->stats.failed_reads), (u64)atomic64_read(&zram->stats.failed_writes), - (u64)atomic64_read(&zram->stats.invalid_io), (u64)atomic64_read(&zram->stats.notify_free)); up_read(&zram->init_lock); @@ -1372,20 +1306,6 @@ out: } /* - * Reads a page from the writeback devices. Corresponding ZRAM slot - * should be unlocked. - */ -static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page, - u32 index, struct bio *bio, bool partial_io) -{ - struct bio_vec bvec; - - bvec_set_page(&bvec, page, PAGE_SIZE, 0); - return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio, - partial_io); -} - -/* * Reads (decompresses if needed) a page from zspool (zsmalloc). * Corresponding ZRAM slot should be locked. */ @@ -1434,8 +1354,8 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page, return ret; } -static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, - struct bio *bio, bool partial_io) +static int zram_read_page(struct zram *zram, struct page *page, u32 index, + struct bio *parent) { int ret; @@ -1445,11 +1365,14 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, ret = zram_read_from_zspool(zram, page, index); zram_slot_unlock(zram, index); } else { - /* Slot should be unlocked before the function call */ + /* + * The slot should be unlocked before reading from the backing + * device. + */ zram_slot_unlock(zram, index); - ret = zram_bvec_read_from_bdev(zram, page, index, bio, - partial_io); + ret = read_from_bdev(zram, page, zram_get_element(zram, index), + parent); } /* Should NEVER happen. Return bio error if it does. */ @@ -1459,39 +1382,34 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, return ret; } -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) +/* + * Use a temporary buffer to decompress the page, as the decompressor + * always expects a full page for the output. + */ +static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset) { + struct page *page = alloc_page(GFP_NOIO); int ret; - struct page *page; - page = bvec->bv_page; - if (is_partial_io(bvec)) { - /* Use a temporary buffer to decompress the page */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); - if (!page) - return -ENOMEM; - } - - ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); - if (unlikely(ret)) - goto out; - - if (is_partial_io(bvec)) { - void *src = kmap_atomic(page); + if (!page) + return -ENOMEM; + ret = zram_read_page(zram, page, index, NULL); + if (likely(!ret)) + memcpy_to_bvec(bvec, page_address(page) + offset); + __free_page(page); + return ret; +} - memcpy_to_bvec(bvec, src + offset); - kunmap_atomic(src); - } -out: +static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ if (is_partial_io(bvec)) - __free_page(page); - - return ret; + return zram_bvec_read_partial(zram, bvec, index, offset); + return zram_read_page(zram, bvec->bv_page, index, bio); } -static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, struct bio *bio) +static int zram_write_page(struct zram *zram, struct page *page, u32 index) { int ret = 0; unsigned long alloced_pages; @@ -1499,7 +1417,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, unsigned int comp_len = 0; void *src, *dst, *mem; struct zcomp_strm *zstrm; - struct page *page = bvec->bv_page; unsigned long element = 0; enum zram_pageflags flags = 0; @@ -1617,40 +1534,33 @@ out: return ret; } -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) +/* + * This is a partial IO. Read the full page before writing the changes. + */ +static int zram_bvec_write_partial(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) { + struct page *page = alloc_page(GFP_NOIO); int ret; - struct page *page = NULL; - struct bio_vec vec; - vec = *bvec; - if (is_partial_io(bvec)) { - void *dst; - /* - * This is a partial IO. We need to read the full page - * before to write the changes. - */ - page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); - if (!page) - return -ENOMEM; - - ret = __zram_bvec_read(zram, page, index, bio, true); - if (ret) - goto out; - - dst = kmap_atomic(page); - memcpy_from_bvec(dst + offset, bvec); - kunmap_atomic(dst); + if (!page) + return -ENOMEM; - bvec_set_page(&vec, page, PAGE_SIZE, 0); + ret = zram_read_page(zram, page, index, bio); + if (!ret) { + memcpy_from_bvec(page_address(page) + offset, bvec); + ret = zram_write_page(zram, page, index); } + __free_page(page); + return ret; +} - ret = __zram_bvec_write(zram, &vec, index, bio); -out: +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ if (is_partial_io(bvec)) - __free_page(page); - return ret; + return zram_bvec_write_partial(zram, bvec, index, offset, bio); + return zram_write_page(zram, bvec->bv_page, index); } #ifdef CONFIG_ZRAM_MULTI_COMP @@ -1761,7 +1671,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page, /* * No direct reclaim (slow path) for handle allocation and no - * re-compression attempt (unlike in __zram_bvec_write()) since + * re-compression attempt (unlike in zram_write_bvec()) since * we already have stored that object in zsmalloc. If we cannot * alloc memory for recompressed object then we bail out and * simply keep the old (existing) object in zsmalloc. @@ -1921,15 +1831,12 @@ release_init_lock: } #endif -/* - * zram_bio_discard - handler on discard request - * @index: physical block index in PAGE_SIZE units - * @offset: byte offset within physical block - */ -static void zram_bio_discard(struct zram *zram, u32 index, - int offset, struct bio *bio) +static void zram_bio_discard(struct zram *zram, struct bio *bio) { size_t n = bio->bi_iter.bi_size; + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (bio->bi_iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; /* * zram manages data in physical block size units. Because logical block @@ -1957,80 +1864,58 @@ static void zram_bio_discard(struct zram *zram, u32 index, index++; n -= PAGE_SIZE; } + + bio_endio(bio); } -/* - * Returns errno if it has some problem. Otherwise return 0 or 1. - * Returns 0 if IO request was done synchronously - * Returns 1 if IO request was successfully submitted. - */ -static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, enum req_op op, struct bio *bio) +static void zram_bio_read(struct zram *zram, struct bio *bio) { - int ret; - - if (!op_is_write(op)) { - ret = zram_bvec_read(zram, bvec, index, offset, bio); - flush_dcache_page(bvec->bv_page); - } else { - ret = zram_bvec_write(zram, bvec, index, offset, bio); - } + struct bvec_iter iter; + struct bio_vec bv; + unsigned long start_time; - zram_slot_lock(zram, index); - zram_accessed(zram, index); - zram_slot_unlock(zram, index); + start_time = bio_start_io_acct(bio); + bio_for_each_segment(bv, bio, iter) { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; - if (unlikely(ret < 0)) { - if (!op_is_write(op)) + if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) { atomic64_inc(&zram->stats.failed_reads); - else - atomic64_inc(&zram->stats.failed_writes); - } + bio->bi_status = BLK_STS_IOERR; + break; + } + flush_dcache_page(bv.bv_page); - return ret; + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); + } + bio_end_io_acct(bio, start_time); + bio_endio(bio); } -static void __zram_make_request(struct zram *zram, struct bio *bio) +static void zram_bio_write(struct zram *zram, struct bio *bio) { - int offset; - u32 index; - struct bio_vec bvec; struct bvec_iter iter; + struct bio_vec bv; unsigned long start_time; - index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; - offset = (bio->bi_iter.bi_sector & - (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - - switch (bio_op(bio)) { - case REQ_OP_DISCARD: - case REQ_OP_WRITE_ZEROES: - zram_bio_discard(zram, index, offset, bio); - bio_endio(bio); - return; - default: - break; - } - start_time = bio_start_io_acct(bio); - bio_for_each_segment(bvec, bio, iter) { - struct bio_vec bv = bvec; - unsigned int unwritten = bvec.bv_len; - - do { - bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, - unwritten); - if (zram_bvec_rw(zram, &bv, index, offset, - bio_op(bio), bio) < 0) { - bio->bi_status = BLK_STS_IOERR; - break; - } + bio_for_each_segment(bv, bio, iter) { + u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; + u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) << + SECTOR_SHIFT; - bv.bv_offset += bv.bv_len; - unwritten -= bv.bv_len; + if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) { + atomic64_inc(&zram->stats.failed_writes); + bio->bi_status = BLK_STS_IOERR; + break; + } - update_position(&index, &offset, &bv); - } while (unwritten); + zram_slot_lock(zram, index); + zram_accessed(zram, index); + zram_slot_unlock(zram, index); } bio_end_io_acct(bio, start_time); bio_endio(bio); @@ -2043,14 +1928,21 @@ static void zram_submit_bio(struct bio *bio) { struct zram *zram = bio->bi_bdev->bd_disk->private_data; - if (!valid_io_request(zram, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size)) { - atomic64_inc(&zram->stats.invalid_io); - bio_io_error(bio); - return; + switch (bio_op(bio)) { + case REQ_OP_READ: + zram_bio_read(zram, bio); + break; + case REQ_OP_WRITE: + zram_bio_write(zram, bio); + break; + case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: + zram_bio_discard(zram, bio); + break; + default: + WARN_ON_ONCE(1); + bio_endio(bio); } - - __zram_make_request(zram, bio); } static void zram_slot_free_notify(struct block_device *bdev, |