From 371b2e348b7731a548b0a7432ca2ba32e90dd16f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 21 Jul 2008 12:00:24 +0100 Subject: dm mpath: free path selector on invalid args Free path selector if the arguments are invalid. This command (note that it is invalid) causes reference leak on module "dm_round_robin" and prevents the module from being removed. dmsetup create --table '0 2 multipath 0 0 1 1 round-robin /dev/sdh' mpath0 Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9f7302d4878d..9f570b2ab7b4 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -525,8 +525,10 @@ static int parse_path_selector(struct arg_set *as, struct priority_group *pg, } r = read_param(_params, shift(as), &ps_argc, &ti->error); - if (r) + if (r) { + dm_put_path_selector(pst); return -EINVAL; + } r = pst->create(&pg->ps, ps_argc, as->argv); if (r) { -- cgit v1.2.3 From c8da2f8dd86d70559ec4e50251f6a755b42bd5b4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 21 Jul 2008 12:00:27 +0100 Subject: dm log: make dm_dirty_log init and exit static dm_dirty_log_{init,exit}() can now become static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Alasdair G Kergon --- drivers/md/dm-log.c | 4 ++-- drivers/md/dm.h | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 67a6f31b7fc3..5b48478c79f5 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -831,7 +831,7 @@ static struct dm_dirty_log_type _disk_type = { .status = disk_status, }; -int __init dm_dirty_log_init(void) +static int __init dm_dirty_log_init(void) { int r; @@ -848,7 +848,7 @@ int __init dm_dirty_log_init(void) return r; } -void __exit dm_dirty_log_exit(void) +static void __exit dm_dirty_log_exit(void) { dm_dirty_log_type_unregister(&_disk_type); dm_dirty_log_type_unregister(&_core_type); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8c03b634e62e..1e59a0b0a78a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -100,12 +100,6 @@ int dm_lock_for_deletion(struct mapped_device *md); void dm_kobject_uevent(struct mapped_device *md); -/* - * Dirty log - */ -int dm_dirty_log_init(void); -void dm_dirty_log_exit(void); - int dm_kcopyd_init(void); void dm_kcopyd_exit(void); -- cgit v1.2.3 From 6ae2fa6718c398290be29ef740873640d25058b6 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 21 Jul 2008 12:00:28 +0100 Subject: dm io: remove struct padding Rearrange struct dm_io. Shrinks size from 40 -> 32 allowing more objects/slab. Signed-off-by: Richard Kennedy Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 372369b1cc20..efe969074928 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -37,8 +37,8 @@ static DEFINE_SPINLOCK(_minor_lock); struct dm_io { struct mapped_device *md; int error; - struct bio *bio; atomic_t io_count; + struct bio *bio; unsigned long start_time; }; -- cgit v1.2.3 From 148acff615b403168cdf39e55bfcfaa6e4a7d233 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 21 Jul 2008 12:00:30 +0100 Subject: dm mpath: return parameter error Return a specific error message if there are an invalid number of multipath arguments. This invalid command returns an "Unknown error" because the ti->error field is not set dmsetup create --table '0 2 multipath 0 0 1 1 round-robin 0 1 1 /dev/sdh' mpath0 Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 9f570b2ab7b4..230d7890ec8a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -625,8 +625,10 @@ static struct priority_group *parse_priority_group(struct arg_set *as, struct pgpath *pgpath; struct arg_set path_args; - if (as->argc < nr_params) + if (as->argc < nr_params) { + ti->error = "not enough path parameters"; goto bad; + } path_args.argc = nr_params; path_args.argv = as->argv; -- cgit v1.2.3 From def052d21c4e77975bb75cf212f018ec913f8e2f Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Mon, 21 Jul 2008 12:00:31 +0100 Subject: dm mpath: fix test for reinstate_path Fix test for reinstate_path method before attempting to use it. Signed-off-by: Alasdair G Kergon Cc: Julia Lawall --- drivers/md/dm-mpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 230d7890ec8a..fea966d66f98 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -871,7 +871,7 @@ static int reinstate_path(struct pgpath *pgpath) if (pgpath->path.is_active) goto out; - if (!pgpath->pg->ps.type) { + if (!pgpath->pg->ps.type->reinstate_path) { DMWARN("Reinstate path not supported by path selector %s", pgpath->pg->ps.type->name); r = -EINVAL; -- cgit v1.2.3 From cd45daffd1f7b53aac0835b23e97f814ec3f10dc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 21 Jul 2008 12:00:32 +0100 Subject: dm snapshot: track snapshot reads Whenever a snapshot read gets mapped through to the origin, track it in a per-snapshot hash table indexed by chunk number, using memory allocated from a new per-snapshot mempool. We need to track these reads to avoid race conditions which will be fixed by patches that follow. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++----- drivers/md/dm-snap.h | 9 +++++ 2 files changed, 106 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba8a47d61b1..de302702ab3e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -40,6 +40,11 @@ */ #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) +/* + * The size of the mempool used to track chunks in use. + */ +#define MIN_IOS 256 + static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); @@ -93,6 +98,42 @@ static struct kmem_cache *exception_cache; static struct kmem_cache *pending_cache; static mempool_t *pending_pool; +struct dm_snap_tracked_chunk { + struct hlist_node node; + chunk_t chunk; +}; + +static struct kmem_cache *tracked_chunk_cache; + +static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, + chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, + GFP_NOIO); + unsigned long flags; + + c->chunk = chunk; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_add_head(&c->node, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + return c; +} + +static void stop_tracking_chunk(struct dm_snapshot *s, + struct dm_snap_tracked_chunk *c) +{ + unsigned long flags; + + spin_lock_irqsave(&s->tracked_chunk_lock, flags); + hlist_del(&c->node); + spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + + mempool_free(c, s->tracked_chunk_pool); +} + /* * One of these per registered origin, held in the snapshot_origins hash */ @@ -482,6 +523,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + int i; int r = -EINVAL; char persistent; char *origin_path; @@ -564,11 +606,24 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad5; } + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, + tracked_chunk_cache); + if (!s->tracked_chunk_pool) { + ti->error = "Could not allocate tracked_chunk mempool for " + "tracking reads"; + goto bad6; + } + + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); + + spin_lock_init(&s->tracked_chunk_lock); + /* Metadata must only be loaded into one table at once */ r = s->store.read_metadata(&s->store); if (r < 0) { ti->error = "Failed to read snapshot metadata"; - goto bad6; + goto bad_load_and_register; } else if (r > 0) { s->valid = 0; DMWARN("Snapshot is marked invalid."); @@ -582,7 +637,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (register_snapshot(s)) { r = -EINVAL; ti->error = "Cannot register snapshot origin"; - goto bad6; + goto bad_load_and_register; } ti->private = s; @@ -590,6 +645,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return 0; + bad_load_and_register: + mempool_destroy(s->tracked_chunk_pool); + bad6: dm_kcopyd_client_destroy(s->kcopyd_client); @@ -624,6 +682,9 @@ static void __free_exceptions(struct dm_snapshot *s) static void snapshot_dtr(struct dm_target *ti) { +#ifdef CONFIG_DM_DEBUG + int i; +#endif struct dm_snapshot *s = ti->private; flush_workqueue(ksnapd); @@ -632,6 +693,13 @@ static void snapshot_dtr(struct dm_target *ti) /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); +#ifdef CONFIG_DM_DEBUG + for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) + BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); +#endif + + mempool_destroy(s->tracked_chunk_pool); + __free_exceptions(s); dm_put_device(ti, s->origin); @@ -974,14 +1042,10 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, start_copy(pe); goto out; } - } else - /* - * FIXME: this read path scares me because we - * always use the origin when we have a pending - * exception. However I can't think of a - * situation where this is wrong - ejt. - */ + } else { bio->bi_bdev = s->origin->bdev; + map_context->ptr = track_chunk(s, chunk); + } out_unlock: up_write(&s->lock); @@ -989,6 +1053,18 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, return r; } +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct dm_snapshot *s = ti->private; + struct dm_snap_tracked_chunk *c = map_context->ptr; + + if (c) + stop_tracking_chunk(s, c); + + return 0; +} + static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; @@ -1266,6 +1342,7 @@ static struct target_type snapshot_target = { .ctr = snapshot_ctr, .dtr = snapshot_dtr, .map = snapshot_map, + .end_io = snapshot_end_io, .resume = snapshot_resume, .status = snapshot_status, }; @@ -1306,11 +1383,18 @@ static int __init dm_snapshot_init(void) goto bad4; } + tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); + if (!tracked_chunk_cache) { + DMERR("Couldn't create cache to track chunks in use."); + r = -ENOMEM; + goto bad5; + } + pending_pool = mempool_create_slab_pool(128, pending_cache); if (!pending_pool) { DMERR("Couldn't create pending pool."); r = -ENOMEM; - goto bad5; + goto bad_pending_pool; } ksnapd = create_singlethread_workqueue("ksnapd"); @@ -1324,6 +1408,8 @@ static int __init dm_snapshot_init(void) bad6: mempool_destroy(pending_pool); + bad_pending_pool: + kmem_cache_destroy(tracked_chunk_cache); bad5: kmem_cache_destroy(pending_cache); bad4: @@ -1355,6 +1441,7 @@ static void __exit dm_snapshot_exit(void) mempool_destroy(pending_pool); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); + kmem_cache_destroy(tracked_chunk_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 24f9fb73b982..70dc961f40d8 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -130,6 +130,10 @@ struct exception_store { void *context; }; +#define DM_TRACKED_CHUNK_HASH_SIZE 16 +#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + struct dm_snapshot { struct rw_semaphore lock; struct dm_target *ti; @@ -174,6 +178,11 @@ struct dm_snapshot { /* Queue of snapshot writes for ksnapd to flush */ struct bio_list queued_bios; struct work_struct queued_bios_work; + + /* Chunks with outstanding reads */ + mempool_t *tracked_chunk_pool; + spinlock_t tracked_chunk_lock; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; }; /* -- cgit v1.2.3 From a8d41b59f3f5a7ac19452ef442a7fc1b5fa17366 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 21 Jul 2008 12:00:34 +0100 Subject: dm snapshot: fix race during exception creation Fix a race condition that returns incorrect data when a write causes an exception to be allocated whilst a read is still in flight. The race condition happens as follows: * A read to non-reallocated sector in the snapshot is submitted so that the read is routed to the original device. * A write to the original device is submitted. The write causes an exception that reallocates the block. The write proceeds. * The original read is dequeued and reads the wrong data. This race can be triggered with CFQ scheduler and one thread writing and multiple threads reading simultaneously. (This patch relies upon the earlier dm-kcopyd-per-device.patch to avoid a deadlock.) Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index de302702ab3e..f4fd0cee9c3d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -134,6 +134,27 @@ static void stop_tracking_chunk(struct dm_snapshot *s, mempool_free(c, s->tracked_chunk_pool); } +static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c; + struct hlist_node *hn; + int found = 0; + + spin_lock_irq(&s->tracked_chunk_lock); + + hlist_for_each_entry(c, hn, + &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { + if (c->chunk == chunk) { + found = 1; + break; + } + } + + spin_unlock_irq(&s->tracked_chunk_lock); + + return found; +} + /* * One of these per registered origin, held in the snapshot_origins hash */ @@ -839,6 +860,13 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success) goto out; } + /* + * Check for conflicting reads. This is extremely improbable, + * so yield() is sufficient and there is no need for a wait queue. + */ + while (__chunk_is_tracked(s, pe->e.old_chunk)) + yield(); + /* * Add a proper exception, and remove the * in-flight exception from the list. -- cgit v1.2.3 From 92e868122edf08b9fc06b112e7e0c80ab94c1f93 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 21 Jul 2008 12:00:35 +0100 Subject: dm snapshot: use per device mempools Change snapshot per-module mempool to per-device mempool. Per-module mempools could cause a deadlock if multiple snapshot devices are stacked above each other. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 40 ++++++++++++++++++++++------------------ drivers/md/dm-snap.h | 2 ++ 2 files changed, 24 insertions(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index f4fd0cee9c3d..6e5528aecc98 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -96,7 +96,6 @@ struct dm_snap_pending_exception { */ static struct kmem_cache *exception_cache; static struct kmem_cache *pending_cache; -static mempool_t *pending_pool; struct dm_snap_tracked_chunk { struct hlist_node node; @@ -364,14 +363,19 @@ static void free_exception(struct dm_snap_exception *e) kmem_cache_free(exception_cache, e); } -static struct dm_snap_pending_exception *alloc_pending_exception(void) +static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) { - return mempool_alloc(pending_pool, GFP_NOIO); + struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, + GFP_NOIO); + + pe->snap = s; + + return pe; } static void free_pending_exception(struct dm_snap_pending_exception *pe) { - mempool_free(pe, pending_pool); + mempool_free(pe, pe->snap->pending_pool); } static void insert_completed_exception(struct dm_snapshot *s, @@ -627,12 +631,18 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad5; } + s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); + if (!s->pending_pool) { + ti->error = "Could not allocate mempool for pending exceptions"; + goto bad6; + } + s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, tracked_chunk_cache); if (!s->tracked_chunk_pool) { ti->error = "Could not allocate tracked_chunk mempool for " "tracking reads"; - goto bad6; + goto bad_tracked_chunk_pool; } for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) @@ -669,6 +679,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad_load_and_register: mempool_destroy(s->tracked_chunk_pool); + bad_tracked_chunk_pool: + mempool_destroy(s->pending_pool); + bad6: dm_kcopyd_client_destroy(s->kcopyd_client); @@ -723,6 +736,8 @@ static void snapshot_dtr(struct dm_target *ti) __free_exceptions(s); + mempool_destroy(s->pending_pool); + dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); @@ -969,7 +984,7 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) * to hold the lock while we do this. */ up_write(&s->lock); - pe = alloc_pending_exception(); + pe = alloc_pending_exception(s); down_write(&s->lock); if (!s->valid) { @@ -989,7 +1004,6 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) bio_list_init(&pe->snapshot_bios); pe->primary_pe = NULL; atomic_set(&pe->ref_count, 0); - pe->snap = s; pe->started = 0; if (s->store.prepare_exception(&s->store, &pe->e)) { @@ -1418,24 +1432,15 @@ static int __init dm_snapshot_init(void) goto bad5; } - pending_pool = mempool_create_slab_pool(128, pending_cache); - if (!pending_pool) { - DMERR("Couldn't create pending pool."); - r = -ENOMEM; - goto bad_pending_pool; - } - ksnapd = create_singlethread_workqueue("ksnapd"); if (!ksnapd) { DMERR("Failed to create ksnapd workqueue."); r = -ENOMEM; - goto bad6; + goto bad_pending_pool; } return 0; - bad6: - mempool_destroy(pending_pool); bad_pending_pool: kmem_cache_destroy(tracked_chunk_cache); bad5: @@ -1466,7 +1471,6 @@ static void __exit dm_snapshot_exit(void) DMERR("origin unregister failed %d", r); exit_origin_hash(); - mempool_destroy(pending_pool); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); kmem_cache_destroy(tracked_chunk_cache); diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 70dc961f40d8..292c15609ae3 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -161,6 +161,8 @@ struct dm_snapshot { /* The last percentage we notified */ int last_percent; + mempool_t *pending_pool; + struct exception_table pending; struct exception_table complete; -- cgit v1.2.3 From f6fccb1213ba3d661baeb2a5eee0a9701dc03e1b Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Mon, 21 Jul 2008 12:00:37 +0100 Subject: dm: introduce merge_bvec_fn Introduce a bvec merge function for device mapper devices for dynamic size restrictions. This code ensures the requested biovec lies within a single target and then calls a target-specific function to check against any constraints imposed by underlying devices. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 44 +++++++++++++++++++++++++++++++++++++++++++ include/linux/device-mapper.h | 6 ++++++ include/linux/dm-ioctl.h | 4 ++-- 3 files changed, 52 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index efe969074928..bca448e11878 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -829,6 +829,49 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) * CRUD END *---------------------------------------------------------------*/ +static int dm_merge_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) +{ + struct mapped_device *md = q->queuedata; + struct dm_table *map = dm_get_table(md); + struct dm_target *ti; + sector_t max_sectors; + int max_size; + + if (unlikely(!map)) + return 0; + + ti = dm_table_find_target(map, bvm->bi_sector); + + /* + * Find maximum amount of I/O that won't need splitting + */ + max_sectors = min(max_io_len(md, bvm->bi_sector, ti), + (sector_t) BIO_MAX_SECTORS); + max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; + if (max_size < 0) + max_size = 0; + + /* + * merge_bvec_fn() returns number of bytes + * it can accept at this offset + * max is precomputed maximal io size + */ + if (max_size && ti->type->merge) + max_size = ti->type->merge(ti, bvm, biovec, max_size); + + /* + * Always allow an entire first page + */ + if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) + max_size = biovec->bv_len; + + dm_table_put(map); + + return max_size; +} + /* * The request function that just remaps the bio built up by * dm_merge_bvec. @@ -1032,6 +1075,7 @@ static struct mapped_device *alloc_dev(int minor) blk_queue_make_request(md->queue, dm_request); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; + blk_queue_merge_bvec(md->queue, dm_merge_bvec); md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); if (!md->io_pool) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 0d8d419d191a..a90222e3297d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -9,11 +9,13 @@ #define _LINUX_DEVICE_MAPPER_H #include +#include struct dm_target; struct dm_table; struct dm_dev; struct mapped_device; +struct bio_vec; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; @@ -72,6 +74,9 @@ typedef int (*dm_ioctl_fn) (struct dm_target *ti, struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size); + void dm_error(const char *message); /* @@ -107,6 +112,7 @@ struct target_type { dm_status_fn status; dm_message_fn message; dm_ioctl_fn ioctl; + dm_merge_fn merge; }; struct io_restrictions { diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index b03c41bbfa14..28c2940eb30d 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -256,9 +256,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 13 +#define DM_VERSION_MINOR 14 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2007-10-18)" +#define DM_VERSION_EXTRA "-ioctl (2008-04-23)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 7bc3447b692185c5ea78bee93d0ef1dee2fd7ce7 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Mon, 21 Jul 2008 12:00:38 +0100 Subject: dm: linear add merge This patch implements biovec merge function for linear target. If the underlying device has merge function defined, call it. If not, keep precomputed value. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-linear.c | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 17753d80ad22..6449bcdf84ca 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -69,13 +69,25 @@ static void linear_dtr(struct dm_target *ti) kfree(lc); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static sector_t linear_map_sector(struct dm_target *ti, sector_t bi_sector) { - struct linear_c *lc = (struct linear_c *) ti->private; + struct linear_c *lc = ti->private; + + return lc->start + (bi_sector - ti->begin); +} + +static void linear_map_bio(struct dm_target *ti, struct bio *bio) +{ + struct linear_c *lc = ti->private; bio->bi_bdev = lc->dev->bdev; - bio->bi_sector = lc->start + (bio->bi_sector - ti->begin); + bio->bi_sector = linear_map_sector(ti, bio->bi_sector); +} + +static int linear_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + linear_map_bio(ti, bio); return DM_MAPIO_REMAPPED; } @@ -114,15 +126,31 @@ static int linear_ioctl(struct dm_target *ti, struct inode *inode, return blkdev_driver_ioctl(bdev->bd_inode, &fake_file, bdev->bd_disk, cmd, arg); } +static int linear_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct linear_c *lc = ti->private; + struct request_queue *q = bdev_get_queue(lc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = lc->dev->bdev; + bvm->bi_sector = linear_map_sector(ti, bvm->bi_sector); + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type linear_target = { .name = "linear", - .version= {1, 0, 2}, + .version= {1, 0, 3}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, .map = linear_map, .status = linear_status, .ioctl = linear_ioctl, + .merge = linear_merge, }; int __init dm_linear_init(void) -- cgit v1.2.3 From 9980c638a666ecd88acaf0a7ab91043d4a3f44d1 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Mon, 21 Jul 2008 12:00:39 +0100 Subject: dm table: remove merge_bvec sector restriction Remove max_sector restriction - merge function replaced it. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 94116eaf4709..798e468103b8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -506,14 +506,13 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_sectors = min_not_zero(rs->max_sectors, q->max_sectors); - /* FIXME: Device-Mapper on top of RAID-0 breaks because DM - * currently doesn't honor MD's merge_bvec_fn routine. - * In this case, we'll force DM to use PAGE_SIZE or - * smaller I/O, just to be safe. A better fix is in the - * works, but add this for the time being so it will at - * least operate correctly. + /* + * Check if merge fn is supported. + * If not we'll force DM to use PAGE_SIZE or + * smaller I/O, just to be safe. */ - if (q->merge_bvec_fn) + + if (q->merge_bvec_fn && !ti->type->merge) rs->max_sectors = min_not_zero(rs->max_sectors, (unsigned int) (PAGE_SIZE >> 9)); -- cgit v1.2.3 From d41e26b901111f4e540aa2c27ec7a1681c782be9 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Mon, 21 Jul 2008 12:00:40 +0100 Subject: dm crypt: add merge This patch implements biovec merge function for crypt target. If the underlying device has merge function defined, call it. If not, keep precomputed value. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index ab6a61db63ce..13956437bc81 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1216,9 +1216,24 @@ error: return -EINVAL; } +static int crypt_merge(struct dm_target *ti, struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct crypt_config *cc = ti->private; + struct request_queue *q = bdev_get_queue(cc->dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cc->dev->bdev; + bvm->bi_sector = cc->start + bvm->bi_sector - ti->begin; + + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + static struct target_type crypt_target = { .name = "crypt", - .version= {1, 5, 0}, + .version= {1, 6, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, @@ -1228,6 +1243,7 @@ static struct target_type crypt_target = { .preresume = crypt_preresume, .resume = crypt_resume, .message = crypt_message, + .merge = crypt_merge, }; static int __init dm_crypt_init(void) -- cgit v1.2.3