diff options
author | Joe Thornber <ejt@redhat.com> | 2016-10-21 10:06:40 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2017-03-07 11:30:16 -0500 |
commit | 742c8fdc31e820503f9267070311d894978d1349 (patch) | |
tree | 42aba7a8ad9cd1db52aa9e3a0a14d3fd0112c65e /drivers/md | |
parent | c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201 (diff) | |
download | lwn-742c8fdc31e820503f9267070311d894978d1349.tar.gz lwn-742c8fdc31e820503f9267070311d894978d1349.zip |
dm bio prison v2: new interface for the bio prison
The deferred set is gone and all methods have _v2 appended to the end of
their names to allow for continued use of the original bio prison in DM
thin-provisioning.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison-v1.c (renamed from drivers/md/dm-bio-prison.c) | 46 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison-v1.h (renamed from drivers/md/dm-bio-prison.h) | 2 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison-v2.c | 369 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison-v2.h | 152 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 2 |
7 files changed, 568 insertions, 6 deletions
diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 3cbda1af87a0..d378b1db7852 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -11,6 +11,7 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ dm-mirror-y += dm-raid1.o dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o +dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o dm-cache-smq-y += dm-cache-policy-smq.o diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison-v1.c index 03af174485d3..ae7da2c30a57 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -5,7 +5,8 @@ */ #include "dm.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v1.h" +#include "dm-bio-prison-v2.h" #include <linux/spinlock.h> #include <linux/mempool.h> @@ -398,7 +399,7 @@ EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); /*----------------------------------------------------------------*/ -static int __init dm_bio_prison_init(void) +static int __init dm_bio_prison_init_v1(void) { _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); if (!_cell_cache) @@ -407,12 +408,51 @@ static int __init dm_bio_prison_init(void) return 0; } -static void __exit dm_bio_prison_exit(void) +static void dm_bio_prison_exit_v1(void) { kmem_cache_destroy(_cell_cache); _cell_cache = NULL; } +static int (*_inits[])(void) __initdata = { + dm_bio_prison_init_v1, + dm_bio_prison_init_v2, +}; + +static void (*_exits[])(void) = { + dm_bio_prison_exit_v1, + dm_bio_prison_exit_v2, +}; + +static int __init dm_bio_prison_init(void) +{ + const int count = ARRAY_SIZE(_inits); + + int r, i; + + for (i = 0; i < count; i++) { + r = _inits[i](); + if (r) + goto bad; + } + + return 0; + + bad: + while (i--) + _exits[i](); + + return r; +} + +static void __exit dm_bio_prison_exit(void) +{ + int i = ARRAY_SIZE(_exits); + + while (i--) + _exits[i](); +} + /* * module hooks */ diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison-v1.h index 54352f009bfd..cddd4ac07e2c 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison-v1.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Red Hat, Inc. + * Copyright (C) 2011-2017 Red Hat, Inc. * * This file is released under the GPL. */ diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c new file mode 100644 index 000000000000..c9b11f799cd8 --- /dev/null +++ b/drivers/md/dm-bio-prison-v2.c @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2012-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison-v2.h" + +#include <linux/spinlock.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/rwsem.h> + +/*----------------------------------------------------------------*/ + +#define MIN_CELLS 1024 + +struct dm_bio_prison_v2 { + struct workqueue_struct *wq; + + spinlock_t lock; + mempool_t *cell_pool; + struct rb_root cells; +}; + +static struct kmem_cache *_cell_cache; + +/*----------------------------------------------------------------*/ + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) +{ + struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + + if (!prison) + return NULL; + + prison->wq = wq; + spin_lock_init(&prison->lock); + + prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->cells = RB_ROOT; + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2); + +void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2); + +struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2); + +void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2); + +static void __setup_new_cell(struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell) +{ + memset(cell, 0, sizeof(*cell)); + memcpy(&cell->key, key, sizeof(cell->key)); + bio_list_init(&cell->bios); +} + +static int cmp_keys(struct dm_cell_key_v2 *lhs, + struct dm_cell_key_v2 *rhs) +{ + if (lhs->virtual < rhs->virtual) + return -1; + + if (lhs->virtual > rhs->virtual) + return 1; + + if (lhs->dev < rhs->dev) + return -1; + + if (lhs->dev > rhs->dev) + return 1; + + if (lhs->block_end <= rhs->block_begin) + return -1; + + if (lhs->block_begin >= rhs->block_end) + return 1; + + return 0; +} + +/* + * Returns true if node found, otherwise it inserts a new one. + */ +static bool __find_or_insert(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **result) +{ + int r; + struct rb_node **new = &prison->cells.rb_node, *parent = NULL; + + while (*new) { + struct dm_bio_prison_cell_v2 *cell = + container_of(*new, struct dm_bio_prison_cell_v2, node); + + r = cmp_keys(key, &cell->key); + + parent = *new; + if (r < 0) + new = &((*new)->rb_left); + + else if (r > 0) + new = &((*new)->rb_right); + + else { + *result = cell; + return true; + } + } + + __setup_new_cell(key, cell_prealloc); + *result = cell_prealloc; + rb_link_node(&cell_prealloc->node, parent, new); + rb_insert_color(&cell_prealloc->node, &prison->cells); + + return false; +} + +static bool __get(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell) +{ + if (__find_or_insert(prison, key, cell_prealloc, cell)) { + if ((*cell)->exclusive_lock) { + if (lock_level <= (*cell)->exclusive_level) { + bio_list_add(&(*cell)->bios, inmate); + return false; + } + } + + (*cell)->shared_count++; + + } else + (*cell)->shared_count = 1; + + return true; +} + +bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_get_v2); + +static bool __put(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + BUG_ON(!cell->shared_count); + cell->shared_count--; + + // FIXME: shared locks granted above the lock level could starve this + if (!cell->shared_count) { + if (cell->exclusive_lock){ + if (cell->quiesce_continuation) { + queue_work(prison->wq, cell->quiesce_continuation); + cell->quiesce_continuation = NULL; + } + } else { + rb_erase(&cell->node, &prison->cells); + return true; + } + } + + return false; +} + +bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __put(prison, cell); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_put_v2); + +static int __lock(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + struct dm_bio_prison_cell_v2 *cell; + + if (__find_or_insert(prison, key, cell_prealloc, &cell)) { + if (cell->exclusive_lock) + return -EBUSY; + + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + + // FIXME: we don't yet know what level these shared locks + // were taken at, so have to quiesce them all. + return cell->shared_count > 0; + + } else { + cell = cell_prealloc; + cell->shared_count = 0; + cell->exclusive_lock = true; + cell->exclusive_level = lock_level; + *cell_result = cell; + } + + return 0; +} + +int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __lock(prison, key, lock_level, cell_prealloc, cell_result); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_v2); + +static void __quiesce(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + if (!cell->shared_count) + queue_work(prison->wq, continuation); + else + cell->quiesce_continuation = continuation; +} + +void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation) +{ + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + __quiesce(prison, cell, continuation); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2); + +static int __promote(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + if (!cell->exclusive_lock) + return -EINVAL; + + cell->exclusive_level = new_lock_level; + return cell->shared_count > 0; +} + +int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __promote(prison, cell, new_lock_level); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2); + +static bool __unlock(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + BUG_ON(!cell->exclusive_lock); + + bio_list_merge(bios, &cell->bios); + bio_list_init(&cell->bios); + + if (cell->shared_count) { + cell->exclusive_lock = 0; + return false; + } + + rb_erase(&cell->node, &prison->cells); + return true; +} + +bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios) +{ + bool r; + unsigned long flags; + + spin_lock_irqsave(&prison->lock, flags); + r = __unlock(prison, cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cell_unlock_v2); + +/*----------------------------------------------------------------*/ + +int __init dm_bio_prison_init_v2(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +void dm_bio_prison_exit_v2(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} diff --git a/drivers/md/dm-bio-prison-v2.h b/drivers/md/dm-bio-prison-v2.h new file mode 100644 index 000000000000..6e04234268db --- /dev/null +++ b/drivers/md/dm-bio-prison-v2.h @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2011-2017 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_PRISON_V2_H +#define DM_BIO_PRISON_V2_H + +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ + +#include <linux/bio.h> +#include <linux/rbtree.h> +#include <linux/workqueue.h> + +/*----------------------------------------------------------------*/ + +int dm_bio_prison_init_v2(void); +void dm_bio_prison_exit_v2(void); + +/* + * Sometimes we can't deal with a bio straight away. We put them in prison + * where they can't cause any mischief. Bios are put in a cell identified + * by a key, multiple bios can be in the same cell. When the cell is + * subsequently unlocked the bios become available. + */ +struct dm_bio_prison_v2; + +/* + * Keys define a range of blocks within either a virtual or physical + * device. + */ +struct dm_cell_key_v2 { + int virtual; + dm_thin_id dev; + dm_block_t block_begin, block_end; +}; + +/* + * Treat this as opaque, only in header so callers can manage allocation + * themselves. + */ +struct dm_bio_prison_cell_v2 { + // FIXME: pack these + bool exclusive_lock; + unsigned exclusive_level; + unsigned shared_count; + struct work_struct *quiesce_continuation; + + struct rb_node node; + struct dm_cell_key_v2 key; + struct bio_list bios; +}; + +struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq); +void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison); + +/* + * These two functions just wrap a mempool. This is a transitory step: + * Eventually all bio prison clients should manage their own cell memory. + * + * Like mempool_alloc(), dm_bio_prison_alloc_cell_v2() can only fail if called + * in interrupt context or passed GFP_NOWAIT. + */ +struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, + gfp_t gfp); +void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell); + +/* + * Shared locks have a bio associated with them. + * + * If the lock is granted the caller can continue to use the bio, and must + * call dm_cell_put_v2() to drop the reference count when finished using it. + * + * If the lock cannot be granted then the bio will be tracked within the + * cell, and later given to the holder of the exclusive lock. + * + * See dm_cell_lock_v2() for discussion of the lock_level parameter. + * + * Compare *cell_result with cell_prealloc to see if the prealloc was used. + * If cell_prealloc was used then inmate wasn't added to it. + * + * Returns true if the lock is granted. + */ +bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct bio *inmate, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result); + +/* + * Decrement the shared reference count for the lock. Returns true if + * returning ownership of the cell (ie. you should free it). + */ +bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell); + +/* + * Locks a cell. No associated bio. Exclusive locks get priority. These + * locks constrain whether the io locks are granted according to level. + * + * Shared locks will still be granted if the lock_level is > (not = to) the + * exclusive lock level. + * + * If an _exclusive_ lock is already held then -EBUSY is returned. + * + * Return values: + * < 0 - error + * 0 - locked; no quiescing needed + * 1 - locked; quiescing needed + */ +int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison, + struct dm_cell_key_v2 *key, + unsigned lock_level, + struct dm_bio_prison_cell_v2 *cell_prealloc, + struct dm_bio_prison_cell_v2 **cell_result); + +void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct work_struct *continuation); + +/* + * Promotes an _exclusive_ lock to a higher lock level. + * + * Return values: + * < 0 - error + * 0 - promoted; no quiescing needed + * 1 - promoted; quiescing needed + */ +int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + unsigned new_lock_level); + +/* + * Adds any held bios to the bio list. + * + * There may be shared locks still held at this point even if you quiesced + * (ie. different lock levels). + * + * Returns true if returning ownership of the cell (ie. you should free + * it). + */ +bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison, + struct dm_bio_prison_cell_v2 *cell, + struct bio_list *bios); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9c689b34e6e7..2eaa414e1509 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -5,7 +5,7 @@ */ #include "dm.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v1.h" #include "dm-bio-record.h" #include "dm-cache-metadata.h" diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2b266a2b5035..9b3e2fcbfb1b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,7 +5,7 @@ */ #include "dm-thin-metadata.h" -#include "dm-bio-prison.h" +#include "dm-bio-prison-v1.h" #include "dm.h" #include <linux/device-mapper.h> |