diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 12:35:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-26 12:35:01 -0700 |
commit | 22165fa79814e71e7a5974b3c37a5028ed16c8f9 (patch) | |
tree | 2dd55b3e3d21017410a0f1b1c2c0281fe4bc8e6c | |
parent | a2f54be94f4cbce269bca6188d3fb4310c346fd1 (diff) | |
parent | b5451e456840af027b794afc2c7c84c2a17f569b (diff) | |
download | lwn-22165fa79814e71e7a5974b3c37a5028ed16c8f9.tar.gz lwn-22165fa79814e71e7a5974b3c37a5028ed16c8f9.zip |
Merge tag 'dm-4.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer:
"Apologies for not pressing this request-based DM partial completion
issue further, it was an oversight on my part. We'll have to get it
fixed up properly and revisit for a future release.
- Revert block and DM core changes the removed request-based DM's
ability to handle partial request completions -- otherwise with the
current SCSI LLDs these changes could lead to silent data
corruption.
- Fix two DM version bumps that were missing from the initial 4.2 DM
pull request (enabled userspace lvm2 to know certain changes have
been made)"
* tag 'dm-4.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm cache policy smq: fix "default" version to be 1.4.0
dm: bump the ioctl version to 4.32.0
Revert "block, dm: don't copy bios for request clones"
Revert "dm: do not allocate any mempools for blk-mq request-based DM"
-rw-r--r-- | block/blk-core.c | 94 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 29 | ||||
-rw-r--r-- | drivers/md/dm.c | 220 | ||||
-rw-r--r-- | drivers/md/dm.h | 5 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 6 | ||||
-rw-r--r-- | include/uapi/linux/dm-ioctl.h | 4 |
8 files changed, 256 insertions, 106 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 688ae9482cb8..82819e68f58b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -142,7 +142,7 @@ EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { - if (error && !(rq->cmd_flags & REQ_CLONE)) + if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; @@ -153,8 +153,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_iter.bi_size == 0 && - !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE))) + if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) bio_endio(bio, error); } @@ -2927,22 +2926,95 @@ int blk_lld_busy(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_lld_busy); -void blk_rq_prep_clone(struct request *dst, struct request *src) +/** + * blk_rq_unprep_clone - Helper function to free all bios in a cloned request + * @rq: the clone request to be cleaned up + * + * Description: + * Free all bios in @rq for a cloned request. + */ +void blk_rq_unprep_clone(struct request *rq) +{ + struct bio *bio; + + while ((bio = rq->bio) != NULL) { + rq->bio = bio->bi_next; + + bio_put(bio); + } +} +EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); + +/* + * Copy attributes of the original request to the clone request. + * The actual data parts (e.g. ->cmd, ->sense) are not copied. + */ +static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK); - dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE; + dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); dst->nr_phys_segments = src->nr_phys_segments; dst->ioprio = src->ioprio; dst->extra_len = src->extra_len; - dst->bio = src->bio; - dst->biotail = src->biotail; - dst->cmd = src->cmd; - dst->cmd_len = src->cmd_len; - dst->sense = src->sense; +} + +/** + * blk_rq_prep_clone - Helper function to setup clone request + * @rq: the request to be setup + * @rq_src: original request to be cloned + * @bs: bio_set that bios for clone are allocated from + * @gfp_mask: memory allocation mask for bio + * @bio_ctr: setup function to be called for each clone bio. + * Returns %0 for success, non %0 for failure. + * @data: private data to be passed to @bio_ctr + * + * Description: + * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. + * The actual data parts of @rq_src (e.g. ->cmd, ->sense) + * are not copied, and copying such parts is the caller's responsibility. + * Also, pages which the original bios are pointing to are not copied + * and the cloned bios just point same pages. + * So cloned bios must be completed before original bios, which means + * the caller must complete @rq before @rq_src. + */ +int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data) +{ + struct bio *bio, *bio_src; + + if (!bs) + bs = fs_bio_set; + + __rq_for_each_bio(bio_src, rq_src) { + bio = bio_clone_fast(bio_src, gfp_mask, bs); + if (!bio) + goto free_and_out; + + if (bio_ctr && bio_ctr(bio, bio_src, data)) + goto free_and_out; + + if (rq->bio) { + rq->biotail->bi_next = bio; + rq->biotail = bio; + } else + rq->bio = rq->biotail = bio; + } + + __blk_rq_prep_clone(rq, rq_src); + + return 0; + +free_and_out: + if (bio) + bio_put(bio); + blk_rq_unprep_clone(rq); + + return -ENOMEM; } EXPORT_SYMBOL_GPL(blk_rq_prep_clone); diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 80f02d3330e2..b6f22651dd35 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1750,7 +1750,7 @@ static struct dm_cache_policy_type smq_policy_type = { static struct dm_cache_policy_type default_policy_type = { .name = "default", - .version = {1, 0, 0}, + .version = {1, 4, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = smq_create, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 85e1d39e9a38..16ba55ad7089 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -942,30 +942,23 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * { unsigned type = dm_table_get_type(t); unsigned per_bio_data_size = 0; + struct dm_target *tgt; unsigned i; - switch (type) { - case DM_TYPE_BIO_BASED: - for (i = 0; i < t->num_targets; i++) { - struct dm_target *tgt = t->targets + i; - - per_bio_data_size = max(per_bio_data_size, - tgt->per_bio_data_size); - } - t->mempools = dm_alloc_bio_mempools(t->integrity_supported, - per_bio_data_size); - break; - case DM_TYPE_REQUEST_BASED: - case DM_TYPE_MQ_REQUEST_BASED: - t->mempools = dm_alloc_rq_mempools(md, type); - break; - default: + if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - if (IS_ERR(t->mempools)) - return PTR_ERR(t->mempools); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size); + if (!t->mempools) + return -ENOMEM; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2fe0992c14a7..f331d888e7f5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -993,6 +993,57 @@ static void clone_endio(struct bio *bio, int error) dec_pending(io, error); } +/* + * Partial completion handling for request-based dm + */ +static void end_clone_bio(struct bio *clone, int error) +{ + struct dm_rq_clone_bio_info *info = + container_of(clone, struct dm_rq_clone_bio_info, clone); + struct dm_rq_target_io *tio = info->tio; + struct bio *bio = info->orig; + unsigned int nr_bytes = info->orig->bi_iter.bi_size; + + bio_put(clone); + + if (tio->error) + /* + * An error has already been detected on the request. + * Once error occurred, just let clone->end_io() handle + * the remainder. + */ + return; + else if (error) { + /* + * Don't notice the error to the upper layer yet. + * The error handling decision is made by the target driver, + * when the request is completed. + */ + tio->error = error; + return; + } + + /* + * I/O for the bio successfully completed. + * Notice the data completion to the upper layer. + */ + + /* + * bios are processed from the head of the list. + * So the completing bio should always be rq->bio. + * If it's not, something wrong is happening. + */ + if (tio->orig->bio != bio) + DMERR("bio completion is going in the middle of the request"); + + /* + * Update the original request. + * Do not use blk_end_request() here, because it may complete + * the original request before the clone, and break the ordering. + */ + blk_update_request(tio->orig, 0, nr_bytes); +} + static struct dm_rq_target_io *tio_from_request(struct request *rq) { return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); @@ -1050,6 +1101,8 @@ static void free_rq_clone(struct request *clone) struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; + blk_rq_unprep_clone(clone); + if (md->type == DM_TYPE_MQ_REQUEST_BASED) /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); @@ -1784,13 +1837,39 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq) dm_complete_request(rq, r); } -static void setup_clone(struct request *clone, struct request *rq, - struct dm_rq_target_io *tio) +static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, + void *data) +{ + struct dm_rq_target_io *tio = data; + struct dm_rq_clone_bio_info *info = + container_of(bio, struct dm_rq_clone_bio_info, clone); + + info->orig = bio_orig; + info->tio = tio; + bio->bi_end_io = end_clone_bio; + + return 0; +} + +static int setup_clone(struct request *clone, struct request *rq, + struct dm_rq_target_io *tio, gfp_t gfp_mask) { - blk_rq_prep_clone(clone, rq); + int r; + + r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, + dm_rq_bio_constructor, tio); + if (r) + return r; + + clone->cmd = rq->cmd; + clone->cmd_len = rq->cmd_len; + clone->sense = rq->sense; clone->end_io = end_clone_request; clone->end_io_data = tio; + tio->clone = clone; + + return 0; } static struct request *clone_rq(struct request *rq, struct mapped_device *md, @@ -1811,7 +1890,12 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, clone = tio->clone; blk_rq_init(NULL, clone); - setup_clone(clone, rq, tio); + if (setup_clone(clone, rq, tio, gfp_mask)) { + /* -ENOMEM */ + if (alloc_clone) + free_clone_request(md, clone); + return NULL; + } return clone; } @@ -1905,7 +1989,11 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, } if (r != DM_MAPIO_REMAPPED) return r; - setup_clone(clone, rq, tio); + if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { + /* -ENOMEM */ + ti->type->release_clone_rq(clone); + return DM_MAPIO_REQUEUE; + } } switch (r) { @@ -2349,42 +2437,30 @@ static void free_dev(struct mapped_device *md) kfree(md); } -static unsigned filter_md_type(unsigned type, struct mapped_device *md) -{ - if (type == DM_TYPE_BIO_BASED) - return type; - - return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; -} - static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p = dm_table_get_md_mempools(t); - switch (filter_md_type(dm_table_get_type(t), md)) { - case DM_TYPE_BIO_BASED: - if (md->bs && md->io_pool) { + if (md->bs) { + /* The md already has necessary mempools. */ + if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { /* - * This bio-based md already has necessary mempools. * Reload bioset because front_pad may have changed * because a different table was loaded. */ bioset_free(md->bs); md->bs = p->bs; p->bs = NULL; - goto out; } - break; - case DM_TYPE_REQUEST_BASED: - if (md->rq_pool && md->io_pool) - /* - * This request-based md already has necessary mempools. - */ - goto out; - break; - case DM_TYPE_MQ_REQUEST_BASED: - BUG_ON(p); /* No mempools needed */ - return; + /* + * There's no need to reload with request-based dm + * because the size of front_pad doesn't change. + * Note for future: If you are to reload bioset, + * prep-ed requests in the queue may refer + * to bio from the old bioset, so you must walk + * through the queue to unprep. + */ + goto out; } BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); @@ -2395,6 +2471,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) p->rq_pool = NULL; md->bs = p->bs; p->bs = NULL; + out: /* mempool bind completed, no longer need any mempools in the table */ dm_table_free_md_mempools(t); @@ -2774,6 +2851,14 @@ out_tag_set: return err; } +static unsigned filter_md_type(unsigned type, struct mapped_device *md) +{ + if (type == DM_TYPE_BIO_BASED) + return type; + + return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; +} + /* * Setup the DM device's queue based on md's type */ @@ -3486,23 +3571,48 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, - unsigned per_bio_data_size) +struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, + unsigned integrity, unsigned per_bio_data_size) { - struct dm_md_mempools *pools; - unsigned int pool_size = dm_get_reserved_bio_based_ios(); + struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); + struct kmem_cache *cachep = NULL; + unsigned int pool_size = 0; unsigned int front_pad; - pools = kzalloc(sizeof(*pools), GFP_KERNEL); if (!pools) - return ERR_PTR(-ENOMEM); + return NULL; - front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + - offsetof(struct dm_target_io, clone); + type = filter_md_type(type, md); - pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); - if (!pools->io_pool) - goto out; + switch (type) { + case DM_TYPE_BIO_BASED: + cachep = _io_cache; + pool_size = dm_get_reserved_bio_based_ios(); + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); + break; + case DM_TYPE_REQUEST_BASED: + cachep = _rq_tio_cache; + pool_size = dm_get_reserved_rq_based_ios(); + pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); + if (!pools->rq_pool) + goto out; + /* fall through to setup remaining rq-based pools */ + case DM_TYPE_MQ_REQUEST_BASED: + if (!pool_size) + pool_size = dm_get_reserved_rq_based_ios(); + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + /* per_bio_data_size is not used. See __bind_mempools(). */ + WARN_ON(per_bio_data_size != 0); + break; + default: + BUG(); + } + + if (cachep) { + pools->io_pool = mempool_create_slab_pool(pool_size, cachep); + if (!pools->io_pool) + goto out; + } pools->bs = bioset_create_nobvec(pool_size, front_pad); if (!pools->bs) @@ -3512,37 +3622,11 @@ struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, goto out; return pools; -out: - dm_free_md_mempools(pools); - return ERR_PTR(-ENOMEM); -} -struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, - unsigned type) -{ - unsigned int pool_size; - struct dm_md_mempools *pools; - - if (filter_md_type(type, md) == DM_TYPE_MQ_REQUEST_BASED) - return NULL; /* No mempools needed */ - - pool_size = dm_get_reserved_rq_based_ios(); - pools = kzalloc(sizeof(*pools), GFP_KERNEL); - if (!pools) - return ERR_PTR(-ENOMEM); - - pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); - if (!pools->rq_pool) - goto out; - - pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache); - if (!pools->io_pool) - goto out; - - return pools; out: dm_free_md_mempools(pools); - return ERR_PTR(-ENOMEM); + + return NULL; } void dm_free_md_mempools(struct dm_md_mempools *pools) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 7fff744f0865..4e984993d40a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -223,9 +223,8 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, - unsigned per_bio_data_size); -struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type); +struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, + unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); /* diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6ab9d12d1f17..7303b3405520 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -192,7 +192,6 @@ enum rq_flag_bits { __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NO_TIMEOUT, /* requests may never expire */ - __REQ_CLONE, /* cloned bios */ __REQ_NR_BITS, /* stops here */ }; @@ -247,6 +246,5 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) -#define REQ_CLONE (1ULL << __REQ_CLONE) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f2f54b4587f..d4068c17d0df 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -774,7 +774,11 @@ extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); -extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); +extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, + struct bio_set *bs, gfp_t gfp_mask, + int (*bio_ctr)(struct bio *, struct bio *, void *), + void *data); +extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_delay_queue(struct request_queue *, unsigned long); diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index eac8c3641f39..061aca3a962d 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 31 +#define DM_VERSION_MINOR 32 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2015-3-12)" +#define DM_VERSION_EXTRA "-ioctl (2015-6-26)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ |