summaryrefslogtreecommitdiff
path: root/drivers/md/dm-writecache.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-writecache.c')
-rw-r--r--drivers/md/dm-writecache.c472
1 files changed, 287 insertions, 185 deletions
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index e21e29e81bbf..18320444fb0a 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -206,6 +206,19 @@ struct dm_writecache {
struct bio_set bio_set;
mempool_t copy_pool;
+
+ struct {
+ unsigned long long reads;
+ unsigned long long read_hits;
+ unsigned long long writes;
+ unsigned long long write_hits_uncommitted;
+ unsigned long long write_hits_committed;
+ unsigned long long writes_around;
+ unsigned long long writes_allocate;
+ unsigned long long writes_blocked_on_freelist;
+ unsigned long long flushes;
+ unsigned long long discards;
+ } stats;
};
#define WB_LIST_INLINE 16
@@ -1157,6 +1170,18 @@ static int process_cleaner_mesg(unsigned argc, char **argv, struct dm_writecache
return 0;
}
+static int process_clear_stats_mesg(unsigned argc, char **argv, struct dm_writecache *wc)
+{
+ if (argc != 1)
+ return -EINVAL;
+
+ wc_lock(wc);
+ memset(&wc->stats, 0, sizeof wc->stats);
+ wc_unlock(wc);
+
+ return 0;
+}
+
static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
char *result, unsigned maxlen)
{
@@ -1169,6 +1194,8 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
r = process_flush_on_suspend_mesg(argc, argv, wc);
else if (!strcasecmp(argv[0], "cleaner"))
r = process_cleaner_mesg(argc, argv, wc);
+ else if (!strcasecmp(argv[0], "clear_stats"))
+ r = process_clear_stats_mesg(argc, argv, wc);
else
DMERR("unrecognised message received: %s", argv[0]);
@@ -1214,14 +1241,13 @@ static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
{
void *buf;
- unsigned long flags;
unsigned size;
int rw = bio_data_dir(bio);
unsigned remaining_size = wc->block_size;
do {
struct bio_vec bv = bio_iter_iovec(bio, bio->bi_iter);
- buf = bvec_kmap_irq(&bv, &flags);
+ buf = bvec_kmap_local(&bv);
size = bv.bv_len;
if (unlikely(size > remaining_size))
size = remaining_size;
@@ -1239,7 +1265,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
memcpy_flushcache_optimized(data, buf, size);
}
- bvec_kunmap_irq(buf, &flags);
+ kunmap_local(buf);
data = (char *)data + size;
remaining_size -= size;
@@ -1294,216 +1320,278 @@ static void writecache_offload_bio(struct dm_writecache *wc, struct bio *bio)
bio_list_add(&wc->flush_list, bio);
}
-static int writecache_map(struct dm_target *ti, struct bio *bio)
+enum wc_map_op {
+ WC_MAP_SUBMIT,
+ WC_MAP_REMAP,
+ WC_MAP_REMAP_ORIGIN,
+ WC_MAP_RETURN,
+ WC_MAP_ERROR,
+};
+
+static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e)
{
- struct wc_entry *e;
- struct dm_writecache *wc = ti->private;
+ if (e) {
+ sector_t next_boundary =
+ read_original_sector(wc, e) - bio->bi_iter.bi_sector;
+ if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT)
+ dm_accept_partial_bio(bio, next_boundary);
+ }
- bio->bi_private = NULL;
+ return WC_MAP_REMAP_ORIGIN;
+}
- wc_lock(wc);
+static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio)
+{
+ enum wc_map_op map_op;
+ struct wc_entry *e;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- if (writecache_has_error(wc))
- goto unlock_error;
+read_next_block:
+ wc->stats.reads++;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+ if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
+ wc->stats.read_hits++;
if (WC_MODE_PMEM(wc)) {
- writecache_flush(wc);
- if (writecache_has_error(wc))
- goto unlock_error;
- if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
- goto unlock_remap_origin;
- goto unlock_submit;
+ bio_copy_block(wc, bio, memory_data(wc, e));
+ if (bio->bi_iter.bi_size)
+ goto read_next_block;
+ map_op = WC_MAP_SUBMIT;
} else {
- if (dm_bio_get_target_bio_nr(bio))
- goto unlock_remap_origin;
- writecache_offload_bio(wc, bio);
- goto unlock_return;
+ dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
+ bio_set_dev(bio, wc->ssd_dev->bdev);
+ bio->bi_iter.bi_sector = cache_sector(wc, e);
+ if (!writecache_entry_is_committed(wc, e))
+ writecache_wait_for_ios(wc, WRITE);
+ map_op = WC_MAP_REMAP;
}
+ } else {
+ map_op = writecache_map_remap_origin(wc, bio, e);
}
- bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+ return map_op;
+}
- if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
- (wc->block_size / 512 - 1)) != 0)) {
- DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
- (unsigned long long)bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size, wc->block_size);
- goto unlock_error;
- }
+static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio,
+ struct wc_entry *e, bool search_used)
+{
+ unsigned bio_size = wc->block_size;
+ sector_t start_cache_sec = cache_sector(wc, e);
+ sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
- if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
- if (writecache_has_error(wc))
- goto unlock_error;
- if (WC_MODE_PMEM(wc)) {
- writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio));
- goto unlock_remap_origin;
+ while (bio_size < bio->bi_iter.bi_size) {
+ if (!search_used) {
+ struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
+ if (!f)
+ break;
+ write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
+ (bio_size >> SECTOR_SHIFT), wc->seq_count);
+ writecache_insert_entry(wc, f);
+ wc->uncommitted_blocks++;
} else {
- writecache_offload_bio(wc, bio);
- goto unlock_return;
+ struct wc_entry *f;
+ struct rb_node *next = rb_next(&e->rb_node);
+ if (!next)
+ break;
+ f = container_of(next, struct wc_entry, rb_node);
+ if (f != e + 1)
+ break;
+ if (read_original_sector(wc, f) !=
+ read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
+ break;
+ if (unlikely(f->write_in_progress))
+ break;
+ if (writecache_entry_is_committed(wc, f))
+ wc->overwrote_committed = true;
+ e = f;
}
+ bio_size += wc->block_size;
+ current_cache_sec += wc->block_size >> SECTOR_SHIFT;
}
- if (bio_data_dir(bio) == READ) {
-read_next_block:
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- if (e && read_original_sector(wc, e) == bio->bi_iter.bi_sector) {
- if (WC_MODE_PMEM(wc)) {
- bio_copy_block(wc, bio, memory_data(wc, e));
- if (bio->bi_iter.bi_size)
- goto read_next_block;
- goto unlock_submit;
- } else {
- dm_accept_partial_bio(bio, wc->block_size >> SECTOR_SHIFT);
- bio_set_dev(bio, wc->ssd_dev->bdev);
- bio->bi_iter.bi_sector = cache_sector(wc, e);
- if (!writecache_entry_is_committed(wc, e))
- writecache_wait_for_ios(wc, WRITE);
- goto unlock_remap;
+ bio_set_dev(bio, wc->ssd_dev->bdev);
+ bio->bi_iter.bi_sector = start_cache_sec;
+ dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+
+ if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
+ wc->uncommitted_blocks = 0;
+ queue_work(wc->writeback_wq, &wc->flush_work);
+ } else {
+ writecache_schedule_autocommit(wc);
+ }
+
+ return WC_MAP_REMAP;
+}
+
+static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio)
+{
+ struct wc_entry *e;
+
+ do {
+ bool found_entry = false;
+ bool search_used = false;
+ wc->stats.writes++;
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
+ if (e) {
+ if (!writecache_entry_is_committed(wc, e)) {
+ wc->stats.write_hits_uncommitted++;
+ search_used = true;
+ goto bio_copy;
}
- } else {
- if (e) {
- sector_t next_boundary =
- read_original_sector(wc, e) - bio->bi_iter.bi_sector;
- if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
- dm_accept_partial_bio(bio, next_boundary);
- }
+ wc->stats.write_hits_committed++;
+ if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
+ wc->overwrote_committed = true;
+ search_used = true;
+ goto bio_copy;
}
- goto unlock_remap_origin;
+ found_entry = true;
+ } else {
+ if (unlikely(wc->cleaner) ||
+ (wc->metadata_only && !(bio->bi_opf & REQ_META)))
+ goto direct_write;
}
- } else {
- do {
- bool found_entry = false;
- bool search_used = false;
- if (writecache_has_error(wc))
- goto unlock_error;
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0);
- if (e) {
- if (!writecache_entry_is_committed(wc, e)) {
- search_used = true;
- goto bio_copy;
- }
- if (!WC_MODE_PMEM(wc) && !e->write_in_progress) {
- wc->overwrote_committed = true;
- search_used = true;
- goto bio_copy;
- }
- found_entry = true;
- } else {
- if (unlikely(wc->cleaner) ||
- (wc->metadata_only && !(bio->bi_opf & REQ_META)))
- goto direct_write;
- }
- e = writecache_pop_from_freelist(wc, (sector_t)-1);
- if (unlikely(!e)) {
- if (!WC_MODE_PMEM(wc) && !found_entry) {
+ e = writecache_pop_from_freelist(wc, (sector_t)-1);
+ if (unlikely(!e)) {
+ if (!WC_MODE_PMEM(wc) && !found_entry) {
direct_write:
- e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
- if (e) {
- sector_t next_boundary = read_original_sector(wc, e) - bio->bi_iter.bi_sector;
- BUG_ON(!next_boundary);
- if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) {
- dm_accept_partial_bio(bio, next_boundary);
- }
- }
- goto unlock_remap_origin;
- }
- writecache_wait_on_freelist(wc);
- continue;
+ wc->stats.writes_around++;
+ e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING);
+ return writecache_map_remap_origin(wc, bio, e);
}
- write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
- writecache_insert_entry(wc, e);
- wc->uncommitted_blocks++;
+ wc->stats.writes_blocked_on_freelist++;
+ writecache_wait_on_freelist(wc);
+ continue;
+ }
+ write_original_sector_seq_count(wc, e, bio->bi_iter.bi_sector, wc->seq_count);
+ writecache_insert_entry(wc, e);
+ wc->uncommitted_blocks++;
+ wc->stats.writes_allocate++;
bio_copy:
- if (WC_MODE_PMEM(wc)) {
- bio_copy_block(wc, bio, memory_data(wc, e));
- } else {
- unsigned bio_size = wc->block_size;
- sector_t start_cache_sec = cache_sector(wc, e);
- sector_t current_cache_sec = start_cache_sec + (bio_size >> SECTOR_SHIFT);
-
- while (bio_size < bio->bi_iter.bi_size) {
- if (!search_used) {
- struct wc_entry *f = writecache_pop_from_freelist(wc, current_cache_sec);
- if (!f)
- break;
- write_original_sector_seq_count(wc, f, bio->bi_iter.bi_sector +
- (bio_size >> SECTOR_SHIFT), wc->seq_count);
- writecache_insert_entry(wc, f);
- wc->uncommitted_blocks++;
- } else {
- struct wc_entry *f;
- struct rb_node *next = rb_next(&e->rb_node);
- if (!next)
- break;
- f = container_of(next, struct wc_entry, rb_node);
- if (f != e + 1)
- break;
- if (read_original_sector(wc, f) !=
- read_original_sector(wc, e) + (wc->block_size >> SECTOR_SHIFT))
- break;
- if (unlikely(f->write_in_progress))
- break;
- if (writecache_entry_is_committed(wc, f))
- wc->overwrote_committed = true;
- e = f;
- }
- bio_size += wc->block_size;
- current_cache_sec += wc->block_size >> SECTOR_SHIFT;
- }
+ if (WC_MODE_PMEM(wc))
+ bio_copy_block(wc, bio, memory_data(wc, e));
+ else
+ return writecache_bio_copy_ssd(wc, bio, e, search_used);
+ } while (bio->bi_iter.bi_size);
- bio_set_dev(bio, wc->ssd_dev->bdev);
- bio->bi_iter.bi_sector = start_cache_sec;
- dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT);
+ if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks))
+ writecache_flush(wc);
+ else
+ writecache_schedule_autocommit(wc);
- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) {
- wc->uncommitted_blocks = 0;
- queue_work(wc->writeback_wq, &wc->flush_work);
- } else {
- writecache_schedule_autocommit(wc);
- }
- goto unlock_remap;
- }
- } while (bio->bi_iter.bi_size);
+ return WC_MAP_SUBMIT;
+}
- if (unlikely(bio->bi_opf & REQ_FUA ||
- wc->uncommitted_blocks >= wc->autocommit_blocks))
- writecache_flush(wc);
- else
- writecache_schedule_autocommit(wc);
- goto unlock_submit;
+static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio *bio)
+{
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+
+ if (WC_MODE_PMEM(wc)) {
+ wc->stats.flushes++;
+ writecache_flush(wc);
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+ else if (unlikely(wc->cleaner) || unlikely(wc->metadata_only))
+ return WC_MAP_REMAP_ORIGIN;
+ return WC_MAP_SUBMIT;
}
+ /* SSD: */
+ if (dm_bio_get_target_bio_nr(bio))
+ return WC_MAP_REMAP_ORIGIN;
+ wc->stats.flushes++;
+ writecache_offload_bio(wc, bio);
+ return WC_MAP_RETURN;
+}
-unlock_remap_origin:
- if (likely(wc->pause != 0)) {
- if (bio_op(bio) == REQ_OP_WRITE) {
- dm_iot_io_begin(&wc->iot, 1);
- bio->bi_private = (void *)2;
- }
+static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio)
+{
+ wc->stats.discards++;
+
+ if (writecache_has_error(wc))
+ return WC_MAP_ERROR;
+
+ if (WC_MODE_PMEM(wc)) {
+ writecache_discard(wc, bio->bi_iter.bi_sector, bio_end_sector(bio));
+ return WC_MAP_REMAP_ORIGIN;
}
- bio_set_dev(bio, wc->dev->bdev);
- wc_unlock(wc);
- return DM_MAPIO_REMAPPED;
+ /* SSD: */
+ writecache_offload_bio(wc, bio);
+ return WC_MAP_RETURN;
+}
-unlock_remap:
- /* make sure that writecache_end_io decrements bio_in_progress: */
- bio->bi_private = (void *)1;
- atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
- wc_unlock(wc);
- return DM_MAPIO_REMAPPED;
+static int writecache_map(struct dm_target *ti, struct bio *bio)
+{
+ struct dm_writecache *wc = ti->private;
+ enum wc_map_op map_op;
-unlock_submit:
- wc_unlock(wc);
- bio_endio(bio);
- return DM_MAPIO_SUBMITTED;
+ bio->bi_private = NULL;
-unlock_return:
- wc_unlock(wc);
- return DM_MAPIO_SUBMITTED;
+ wc_lock(wc);
-unlock_error:
- wc_unlock(wc);
- bio_io_error(bio);
- return DM_MAPIO_SUBMITTED;
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
+ map_op = writecache_map_flush(wc, bio);
+ goto done;
+ }
+
+ bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
+
+ if (unlikely((((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
+ (wc->block_size / 512 - 1)) != 0)) {
+ DMERR("I/O is not aligned, sector %llu, size %u, block size %u",
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, wc->block_size);
+ map_op = WC_MAP_ERROR;
+ goto done;
+ }
+
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ map_op = writecache_map_discard(wc, bio);
+ goto done;
+ }
+
+ if (bio_data_dir(bio) == READ)
+ map_op = writecache_map_read(wc, bio);
+ else
+ map_op = writecache_map_write(wc, bio);
+done:
+ switch (map_op) {
+ case WC_MAP_REMAP_ORIGIN:
+ if (likely(wc->pause != 0)) {
+ if (bio_op(bio) == REQ_OP_WRITE) {
+ dm_iot_io_begin(&wc->iot, 1);
+ bio->bi_private = (void *)2;
+ }
+ }
+ bio_set_dev(bio, wc->dev->bdev);
+ wc_unlock(wc);
+ return DM_MAPIO_REMAPPED;
+
+ case WC_MAP_REMAP:
+ /* make sure that writecache_end_io decrements bio_in_progress: */
+ bio->bi_private = (void *)1;
+ atomic_inc(&wc->bio_in_progress[bio_data_dir(bio)]);
+ wc_unlock(wc);
+ return DM_MAPIO_REMAPPED;
+
+ case WC_MAP_SUBMIT:
+ wc_unlock(wc);
+ bio_endio(bio);
+ return DM_MAPIO_SUBMITTED;
+
+ case WC_MAP_RETURN:
+ wc_unlock(wc);
+ return DM_MAPIO_SUBMITTED;
+
+ case WC_MAP_ERROR:
+ wc_unlock(wc);
+ bio_io_error(bio);
+ return DM_MAPIO_SUBMITTED;
+
+ default:
+ BUG();
+ return -1;
+ }
}
static int writecache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *status)
@@ -2569,9 +2657,20 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
- DMEMIT("%ld %llu %llu %llu", writecache_has_error(wc),
+ DMEMIT("%ld %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu",
+ writecache_has_error(wc),
(unsigned long long)wc->n_blocks, (unsigned long long)wc->freelist_size,
- (unsigned long long)wc->writeback_size);
+ (unsigned long long)wc->writeback_size,
+ wc->stats.reads,
+ wc->stats.read_hits,
+ wc->stats.writes,
+ wc->stats.write_hits_uncommitted,
+ wc->stats.write_hits_committed,
+ wc->stats.writes_around,
+ wc->stats.writes_allocate,
+ wc->stats.writes_blocked_on_freelist,
+ wc->stats.flushes,
+ wc->stats.discards);
break;
case STATUSTYPE_TABLE:
DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's',
@@ -2624,12 +2723,15 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
if (wc->pause_set)
DMEMIT(" pause_writeback %u", wc->pause_value);
break;
+ case STATUSTYPE_IMA:
+ *result = '\0';
+ break;
}
}
static struct target_type writecache_target = {
.name = "writecache",
- .version = {1, 5, 0},
+ .version = {1, 6, 0},
.module = THIS_MODULE,
.ctr = writecache_ctr,
.dtr = writecache_dtr,