diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2020-01-15 04:35:22 -0500 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2020-01-16 13:34:17 -0500 |
commit | dcd195071f22d4770911ca46694ca398b6d5101d (patch) | |
tree | caa113ebf22d61d9034bc36fee85cb81b6061f69 | |
parent | be240ff5e402df49c4ddbcb0595ef96009239f6a (diff) | |
download | lwn-dcd195071f22d4770911ca46694ca398b6d5101d.tar.gz lwn-dcd195071f22d4770911ca46694ca398b6d5101d.zip |
dm writecache: improve performance of large linear writes on SSDs
When dm-writecache is used with SSD as a cache device, it would submit a
separate bio for each written block. The I/Os would be merged by the disk
scheduler, but this merging degrades performance.
Improve dm-writecache performance by submitting larger bios - this is
possible as long as there is consecutive free space on the cache
device.
Benchmark (arm64 with 64k page size, using /dev/ram0 as a cache device):
fio --bs=512k --iodepth=32 --size=400M --direct=1 \
--filename=/dev/mapper/cache --rw=randwrite --numjobs=1 --name=test
block old new
size MiB/s MiB/s
---------------------
512 181 700
1k 347 1256
2k 644 2020
4k 1183 2759
8k 1852 3333
16k 2469 3509
32k 2974 3670
64k 3404 3810
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r-- | drivers/md/dm-writecache.c | 29 |
1 files changed, 25 insertions, 4 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 9b0a3bf6a4a1..b9e27e37a943 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -625,7 +625,7 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry wc->freelist_size++; } -static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) +static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc, sector_t expected_sector) { struct wc_entry *e; @@ -634,6 +634,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) if (unlikely(!wc->current_free)) return NULL; e = wc->current_free; + if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) + return NULL; next = rb_next(&e->rb_node); rb_erase(&e->rb_node, &wc->freetree); if (unlikely(!next)) @@ -643,6 +645,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) if (unlikely(list_empty(&wc->freelist))) return NULL; e = container_of(wc->freelist.next, struct wc_entry, lru); + if (expected_sector != (sector_t)-1 && unlikely(cache_sector(wc, e) != expected_sector)) + return NULL; list_del(&e->lru); } wc->freelist_size--; @@ -1193,7 +1197,7 @@ read_next_block: goto bio_copy; } } - e = writecache_pop_from_freelist(wc); + e = writecache_pop_from_freelist(wc, (sector_t)-1); if (unlikely(!e)) { writecache_wait_on_freelist(wc); continue; @@ -1205,9 +1209,26 @@ bio_copy: if (WC_MODE_PMEM(wc)) { bio_copy_block(wc, bio, memory_data(wc, e)); } else { - dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT); + unsigned bio_size = wc->block_size; + sector_t start_cache_sec = cache_sector(wc, e); + sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT); + + while (bio_size < bio->bi_iter.bi_size) { + struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec); + if (!f) + break; + write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector + + (bio_size >> SECTOR_SHIFT), wc->seq_count); + writecache_insert_entry(wc, f); + wc->uncommitted_blocks++; + bio_size += wc->block_size; + current_cache_sec += wc->block_size >> SECTOR_SHIFT; + } + bio_set_dev(bio, wc->ssd_dev->bdev); - bio->bi_iter.bi_sector = cache_sector(wc, e); + bio->bi_iter.bi_sector = start_cache_sec; + dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { wc->uncommitted_blocks = 0; queue_work(wc->writeback_wq, &wc->flush_work); |