diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 15:50:56 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 15:50:56 -0800 |
commit | b91593fa8531a7396551dd9c0a0c51e9b9b97ca9 (patch) | |
tree | 10b78272c77e6a2872c2b02bd12b24f484a3eead | |
parent | e2c5923c349c1738fe8fda980874d93f6fb2e5b6 (diff) | |
parent | ef7afb3656854de04fe03b0b9b4f3722b5722d8d (diff) | |
download | lwn-b91593fa8531a7396551dd9c0a0c51e9b9b97ca9.tar.gz lwn-b91593fa8531a7396551dd9c0a0c51e9b9b97ca9.zip |
Merge tag 'for-4.15/dm' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- a few conversions from atomic_t to ref_count_t
- a DM core fix for a race during device destruction that could result
in a BUG_ON
- a stable@ fix for a DM cache race condition that could lead to data
corruption when operating in writeback mode (writethrough is default)
- various DM cache cleanups and improvements
- add DAX support to the DM log-writes target
- a fix for the DM zoned target's ability to deal with the last zone of
the drive being smaller than all others
- a stable@ DM crypt and DM integrity fix for a negative check that was
to restrictive (prevented slab debug with XFS ontop of DM crypt from
working)
- a DM raid target fix for a panic that can occur when forcing a raid
to sync
* tag 'for-4.15/dm' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (25 commits)
dm cache: lift common migration preparation code to alloc_migration()
dm cache: remove usused deferred_cells member from struct cache
dm cache policy smq: allocate cache blocks in order
dm cache policy smq: change max background work from 10240 to 4096 blocks
dm cache background tracker: limit amount of background work that may be issued at once
dm cache policy smq: take origin idle status into account when queuing writebacks
dm cache policy smq: handle races with queuing background_work
dm raid: fix panic when attempting to force a raid to sync
dm integrity: allow unaligned bv_offset
dm crypt: allow unaligned bv_offset
dm: small cleanup in dm_get_md()
dm: fix race between dm_get_from_kobject() and __dm_destroy()
dm: allocate struct mapped_device with kvzalloc
dm zoned: ignore last smaller runt zone
dm space map metadata: use ARRAY_SIZE
dm log writes: add support for DAX
dm log writes: add support for inline data buffers
dm cache: simplify get_per_bio_data() by removing data_size argument
dm cache: remove all obsolete writethrough-specific code
dm cache: submit writethrough writes in parallel to origin and cache
...
-rw-r--r-- | drivers/md/dm-cache-background-tracker.c | 18 | ||||
-rw-r--r-- | drivers/md/dm-cache-metadata.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 42 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 326 | ||||
-rw-r--r-- | drivers/md/dm-core.h | 3 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 175 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 21 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-zoned-target.c | 13 | ||||
-rw-r--r-- | drivers/md/dm.c | 45 | ||||
-rw-r--r-- | drivers/md/dm.h | 3 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 3 |
14 files changed, 408 insertions, 262 deletions
diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 707233891291..1d0af0a21fc7 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -161,8 +161,17 @@ EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued); static bool max_work_reached(struct background_tracker *b) { - // FIXME: finish - return false; + return atomic_read(&b->pending_promotes) + + atomic_read(&b->pending_writebacks) + + atomic_read(&b->pending_demotes) >= b->max_work; +} + +struct bt_work *alloc_work(struct background_tracker *b) +{ + if (max_work_reached(b)) + return NULL; + + return kmem_cache_alloc(b->work_cache, GFP_NOWAIT); } int btracker_queue(struct background_tracker *b, @@ -174,10 +183,7 @@ int btracker_queue(struct background_tracker *b, if (pwork) *pwork = NULL; - if (max_work_reached(b)) - return -ENOMEM; - - w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT); + w = alloc_work(b); if (!w) return -ENOMEM; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 4a4e9c75fc4c..0d7212410e21 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -13,6 +13,7 @@ #include "persistent-data/dm-transaction-manager.h" #include <linux/device-mapper.h> +#include <linux/refcount.h> /*----------------------------------------------------------------*/ @@ -100,7 +101,7 @@ struct cache_disk_superblock { } __packed; struct dm_cache_metadata { - atomic_t ref_count; + refcount_t ref_count; struct list_head list; unsigned version; @@ -753,7 +754,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev, } cmd->version = metadata_version; - atomic_set(&cmd->ref_count, 1); + refcount_set(&cmd->ref_count, 1); init_rwsem(&cmd->root_lock); cmd->bdev = bdev; cmd->data_block_size = data_block_size; @@ -791,7 +792,7 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev) list_for_each_entry(cmd, &table, list) if (cmd->bdev == bdev) { - atomic_inc(&cmd->ref_count); + refcount_inc(&cmd->ref_count); return cmd; } @@ -862,7 +863,7 @@ struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, void dm_cache_metadata_close(struct dm_cache_metadata *cmd) { - if (atomic_dec_and_test(&cmd->ref_count)) { + if (refcount_dec_and_test(&cmd->ref_count)) { mutex_lock(&table_lock); list_del(&cmd->list); mutex_unlock(&table_lock); diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index e5eb9c9b4bc8..4ab23d0075f6 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -213,6 +213,19 @@ static void l_del(struct entry_space *es, struct ilist *l, struct entry *e) l->nr_elts--; } +static struct entry *l_pop_head(struct entry_space *es, struct ilist *l) +{ + struct entry *e; + + for (e = l_head(es, l); e; e = l_next(es, e)) + if (!e->sentinel) { + l_del(es, l, e); + return e; + } + + return NULL; +} + static struct entry *l_pop_tail(struct entry_space *es, struct ilist *l) { struct entry *e; @@ -719,7 +732,7 @@ static struct entry *alloc_entry(struct entry_alloc *ea) if (l_empty(&ea->free)) return NULL; - e = l_pop_tail(ea->es, &ea->free); + e = l_pop_head(ea->es, &ea->free); init_entry(e); ea->nr_allocated++; @@ -1158,13 +1171,13 @@ static void clear_pending(struct smq_policy *mq, struct entry *e) e->pending_work = false; } -static void queue_writeback(struct smq_policy *mq) +static void queue_writeback(struct smq_policy *mq, bool idle) { int r; struct policy_work work; struct entry *e; - e = q_peek(&mq->dirty, mq->dirty.nr_levels, !mq->migrations_allowed); + e = q_peek(&mq->dirty, mq->dirty.nr_levels, idle); if (e) { mark_pending(mq, e); q_del(&mq->dirty, e); @@ -1174,12 +1187,16 @@ static void queue_writeback(struct smq_policy *mq) work.cblock = infer_cblock(mq, e); r = btracker_queue(mq->bg_work, &work, NULL); - WARN_ON_ONCE(r); // FIXME: finish, I think we have to get rid of this race. + if (r) { + clear_pending(mq, e); + q_push_front(&mq->dirty, e); + } } } static void queue_demotion(struct smq_policy *mq) { + int r; struct policy_work work; struct entry *e; @@ -1189,7 +1206,7 @@ static void queue_demotion(struct smq_policy *mq) e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); if (!e) { if (!clean_target_met(mq, true)) - queue_writeback(mq); + queue_writeback(mq, false); return; } @@ -1199,12 +1216,17 @@ static void queue_demotion(struct smq_policy *mq) work.op = POLICY_DEMOTE; work.oblock = e->oblock; work.cblock = infer_cblock(mq, e); - btracker_queue(mq->bg_work, &work, NULL); + r = btracker_queue(mq->bg_work, &work, NULL); + if (r) { + clear_pending(mq, e); + q_push_front(&mq->clean, e); + } } static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, struct policy_work **workp) { + int r; struct entry *e; struct policy_work work; @@ -1234,7 +1256,9 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, work.op = POLICY_PROMOTE; work.oblock = oblock; work.cblock = infer_cblock(mq, e); - btracker_queue(mq->bg_work, &work, workp); + r = btracker_queue(mq->bg_work, &work, workp); + if (r) + free_entry(&mq->cache_alloc, e); } /*----------------------------------------------------------------*/ @@ -1418,7 +1442,7 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle, r = btracker_issue(mq->bg_work, result); if (r == -ENODATA) { if (!clean_target_met(mq, idle)) { - queue_writeback(mq); + queue_writeback(mq, idle); r = btracker_issue(mq->bg_work, result); } } @@ -1778,7 +1802,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size, mq->next_hotspot_period = jiffies; mq->next_cache_period = jiffies; - mq->bg_work = btracker_create(10240); /* FIXME: hard coded value */ + mq->bg_work = btracker_create(4096); /* FIXME: hard coded value */ if (!mq->bg_work) goto bad_btracker; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 8785134c9f1f..cf23a14f9c6a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -408,9 +408,7 @@ struct cache { int sectors_per_block_shift; spinlock_t lock; - struct list_head deferred_cells; struct bio_list deferred_bios; - struct bio_list deferred_writethrough_bios; sector_t migration_threshold; wait_queue_head_t migration_wait; atomic_t nr_allocated_migrations; @@ -446,10 +444,10 @@ struct cache { struct dm_kcopyd_client *copier; struct workqueue_struct *wq; struct work_struct deferred_bio_worker; - struct work_struct deferred_writethrough_worker; struct work_struct migration_worker; struct delayed_work waker; struct dm_bio_prison_v2 *prison; + struct bio_set *bs; mempool_t *migration_pool; @@ -490,15 +488,6 @@ struct per_bio_data { struct dm_bio_prison_cell_v2 *cell; struct dm_hook_info hook_info; sector_t len; - - /* - * writethrough fields. These MUST remain at the end of this - * structure and the 'cache' member must be the first as it - * is used to determine the offset of the writethrough fields. - */ - struct cache *cache; - dm_cblock_t cblock; - struct dm_bio_details bio_details; }; struct dm_cache_migration { @@ -515,19 +504,19 @@ struct dm_cache_migration { /*----------------------------------------------------------------*/ -static bool writethrough_mode(struct cache_features *f) +static bool writethrough_mode(struct cache *cache) { - return f->io_mode == CM_IO_WRITETHROUGH; + return cache->features.io_mode == CM_IO_WRITETHROUGH; } -static bool writeback_mode(struct cache_features *f) +static bool writeback_mode(struct cache *cache) { - return f->io_mode == CM_IO_WRITEBACK; + return cache->features.io_mode == CM_IO_WRITEBACK; } -static inline bool passthrough_mode(struct cache_features *f) +static inline bool passthrough_mode(struct cache *cache) { - return unlikely(f->io_mode == CM_IO_PASSTHROUGH); + return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH); } /*----------------------------------------------------------------*/ @@ -537,14 +526,9 @@ static void wake_deferred_bio_worker(struct cache *cache) queue_work(cache->wq, &cache->deferred_bio_worker); } -static void wake_deferred_writethrough_worker(struct cache *cache) -{ - queue_work(cache->wq, &cache->deferred_writethrough_worker); -} - static void wake_migration_worker(struct cache *cache) { - if (passthrough_mode(&cache->features)) + if (passthrough_mode(cache)) return; queue_work(cache->wq, &cache->migration_worker); @@ -567,10 +551,13 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache) struct dm_cache_migration *mg; mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); - if (mg) { - mg->cache = cache; - atomic_inc(&mg->cache->nr_allocated_migrations); - } + if (!mg) + return NULL; + + memset(mg, 0, sizeof(*mg)); + + mg->cache = cache; + atomic_inc(&cache->nr_allocated_migrations); return mg; } @@ -618,27 +605,16 @@ static unsigned lock_level(struct bio *bio) * Per bio data *--------------------------------------------------------------*/ -/* - * If using writeback, leave out struct per_bio_data's writethrough fields. - */ -#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) -#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) - -static size_t get_per_bio_data_size(struct cache *cache) +static struct per_bio_data *get_per_bio_data(struct bio *bio) { - return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; -} - -static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) -{ - struct per_bio_data *pb = dm_per_bio_data(bio, data_size); + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); BUG_ON(!pb); return pb; } -static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) +static struct per_bio_data *init_per_bio_data(struct bio *bio) { - struct per_bio_data *pb = get_per_bio_data(bio, data_size); + struct per_bio_data *pb = get_per_bio_data(bio); pb->tick = false; pb->req_nr = dm_bio_get_target_bio_nr(bio); @@ -678,7 +654,6 @@ static void defer_bios(struct cache *cache, struct bio_list *bios) static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio) { bool r; - size_t pb_size; struct per_bio_data *pb; struct dm_cell_key_v2 key; dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); @@ -703,8 +678,7 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi if (cell != cell_prealloc) free_prison_cell(cache, cell_prealloc); - pb_size = get_per_bio_data_size(cache); - pb = get_per_bio_data(bio, pb_size); + pb = get_per_bio_data(bio); pb->cell = cell; return r; @@ -856,28 +830,35 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) { unsigned long flags; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb; spin_lock_irqsave(&cache->lock, flags); if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) && bio_op(bio) != REQ_OP_DISCARD) { + pb = get_per_bio_data(bio); pb->tick = true; cache->need_tick_bio = false; } spin_unlock_irqrestore(&cache->lock, flags); } -static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, - dm_oblock_t oblock) +static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, bool bio_has_pbd) { - // FIXME: this is called way too much. - check_if_tick_bio_needed(cache, bio); + if (bio_has_pbd) + check_if_tick_bio_needed(cache, bio); remap_to_origin(cache, bio); if (bio_data_dir(bio) == WRITE) clear_discard(cache, oblock_to_dblock(cache, oblock)); } +static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock) +{ + // FIXME: check_if_tick_bio_needed() is called way too much through this interface + __remap_to_origin_clear_discard(cache, bio, oblock, true); +} + static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, dm_oblock_t oblock, dm_cblock_t cblock) { @@ -908,10 +889,10 @@ static bool accountable_bio(struct cache *cache, struct bio *bio) static void accounted_begin(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb; if (accountable_bio(cache, bio)) { + pb = get_per_bio_data(bio); pb->len = bio_sectors(bio); iot_io_begin(&cache->tracker, pb->len); } @@ -919,8 +900,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio) static void accounted_complete(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); iot_io_end(&cache->tracker, pb->len); } @@ -937,57 +917,26 @@ static void issue_op(struct bio *bio, void *context) accounted_request(cache, bio); } -static void defer_writethrough_bio(struct cache *cache, struct bio *bio) -{ - unsigned long flags; - - spin_lock_irqsave(&cache->lock, flags); - bio_list_add(&cache->deferred_writethrough_bios, bio); - spin_unlock_irqrestore(&cache->lock, flags); - - wake_deferred_writethrough_worker(cache); -} - -static void writethrough_endio(struct bio *bio) -{ - struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); - - dm_unhook_bio(&pb->hook_info, bio); - - if (bio->bi_status) { - bio_endio(bio); - return; - } - - dm_bio_restore(&pb->bio_details, bio); - remap_to_cache(pb->cache, bio, pb->cblock); - - /* - * We can't issue this bio directly, since we're in interrupt - * context. So it gets put on a bio list for processing by the - * worker thread. - */ - defer_writethrough_bio(pb->cache, bio); -} - /* - * FIXME: send in parallel, huge latency as is. * When running in writethrough mode we need to send writes to clean blocks - * to both the cache and origin devices. In future we'd like to clone the - * bio and send them in parallel, but for now we're doing them in - * series as this is easier. + * to both the cache and origin devices. Clone the bio and send them in parallel. */ -static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, - dm_oblock_t oblock, dm_cblock_t cblock) +static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, dm_cblock_t cblock) { - struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); + struct bio *origin_bio = bio_clone_fast(bio, GFP_NOIO, cache->bs); + + BUG_ON(!origin_bio); - pb->cache = cache; - pb->cblock = cblock; - dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); - dm_bio_record(&pb->bio_details, bio); + bio_chain(origin_bio, bio); + /* + * Passing false to __remap_to_origin_clear_discard() skips + * all code that might use per_bio_data (since clone doesn't have it) + */ + __remap_to_origin_clear_discard(cache, origin_bio, oblock, false); + submit_bio(origin_bio); - remap_to_origin_clear_discard(pb->cache, bio, oblock); + remap_to_cache(cache, bio, cblock); } /*---------------------------------------------------------------- @@ -1201,6 +1150,18 @@ static void background_work_end(struct cache *cache) /*----------------------------------------------------------------*/ +static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) +{ + return (bio_data_dir(bio) == WRITE) && + (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); +} + +static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) +{ + return writeback_mode(cache) && + (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); +} + static void quiesce(struct dm_cache_migration *mg, void (*continuation)(struct work_struct *)) { @@ -1248,8 +1209,7 @@ static int copy(struct dm_cache_migration *mg, bool promote) static void bio_drop_shared_lock(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell)) free_prison_cell(cache, pb->cell); @@ -1260,23 +1220,21 @@ static void overwrite_endio(struct bio *bio) { struct dm_cache_migration *mg = bio->bi_private; struct cache *cache = mg->cache; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); dm_unhook_bio(&pb->hook_info, bio); if (bio->bi_status) mg->k.input = bio->bi_status; - queue_continuation(mg->cache->wq, &mg->k); + queue_continuation(cache->wq, &mg->k); } static void overwrite(struct dm_cache_migration *mg, void (*continuation)(struct work_struct *)) { struct bio *bio = mg->overwrite_bio; - size_t pb_data_size = get_per_bio_data_size(mg->cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); @@ -1474,13 +1432,51 @@ static void mg_upgrade_lock(struct work_struct *ws) } } +static void mg_full_copy(struct work_struct *ws) +{ + struct dm_cache_migration *mg = ws_to_mg(ws); + struct cache *cache = mg->cache; + struct policy_work *op = mg->op; + bool is_policy_promote = (op->op == POLICY_PROMOTE); + + if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || + is_discarded_oblock(cache, op->oblock)) { + mg_upgrade_lock(ws); + return; + } + + init_continuation(&mg->k, mg_upgrade_lock); + + if (copy(mg, is_policy_promote)) { + DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); + mg->k.input = BLK_STS_IOERR; + mg_complete(mg, false); + } +} + static void mg_copy(struct work_struct *ws) { - int r; struct dm_cache_migration *mg = ws_to_mg(ws); if (mg->overwrite_bio) { /* + * No exclusive lock was held when we last checked if the bio + * was optimisable. So we have to check again in case things + * have changed (eg, the block may no longer be discarded). + */ + if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) { + /* + * Fallback to a real full copy after doing some tidying up. + */ + bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio); + BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */ + mg->overwrite_bio = NULL; + inc_io_migrations(mg->cache); + mg_full_copy(ws); + return; + } + + /* * It's safe to do this here, even though it's new data * because all IO has been locked out of the block. * @@ -1489,26 +1485,8 @@ static void mg_copy(struct work_struct *ws) */ overwrite(mg, mg_update_metadata_after_copy); - } else { - struct cache *cache = mg->cache; - struct policy_work *op = mg->op; - bool is_policy_promote = (op->op == POLICY_PROMOTE); - - if ((!is_policy_promote && !is_dirty(cache, op->cblock)) || - is_discarded_oblock(cache, op->oblock)) { - mg_upgrade_lock(ws); - return; - } - - init_continuation(&mg->k, mg_upgrade_lock); - - r = copy(mg, is_policy_promote); - if (r) { - DMERR_LIMIT("%s: migration copy failed", cache_device_name(cache)); - mg->k.input = BLK_STS_IOERR; - mg_complete(mg, false); - } - } + } else + mg_full_copy(ws); } static int mg_lock_writes(struct dm_cache_migration *mg) @@ -1567,9 +1545,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio return -ENOMEM; } - memset(mg, 0, sizeof(*mg)); - - mg->cache = cache; mg->op = op; mg->overwrite_bio = bio; @@ -1703,9 +1678,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, return -ENOMEM; } - memset(mg, 0, sizeof(*mg)); - - mg->cache = cache; mg->overwrite_bio = bio; mg->invalidate_cblock = cblock; mg->invalidate_oblock = oblock; @@ -1748,26 +1720,12 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) /*----------------------------------------------------------------*/ -static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) -{ - return (bio_data_dir(bio) == WRITE) && - (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); -} - -static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block) -{ - return writeback_mode(&cache->features) && - (is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio)); -} - static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, bool *commit_needed) { int r, data_dir; bool rb, background_queued; dm_cblock_t cblock; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); *commit_needed = false; @@ -1816,6 +1774,8 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, } if (r == -ENOENT) { + struct per_bio_data *pb = get_per_bio_data(bio); + /* * Miss. */ @@ -1823,7 +1783,6 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, if (pb->req_nr == 0) { accounted_begin(cache, bio); remap_to_origin_clear_discard(cache, bio, block); - } else { /* * This is a duplicate writethrough io that is no @@ -1842,18 +1801,17 @@ static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block, * Passthrough always maps to the origin, invalidating any * cache blocks that are written to. */ - if (passthrough_mode(&cache->features)) { + if (passthrough_mode(cache)) { if (bio_data_dir(bio) == WRITE) { bio_drop_shared_lock(cache, bio); atomic_inc(&cache->stats.demotion); invalidate_start(cache, cblock, block, bio); } else remap_to_origin_clear_discard(cache, bio, block); - } else { - if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && + if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) && !is_dirty(cache, cblock)) { - remap_to_origin_then_cache(cache, bio, block, cblock); + remap_to_origin_and_cache(cache, bio, block, cblock); accounted_begin(cache, bio); } else remap_to_cache_dirty(cache, bio, block, cblock); @@ -1922,8 +1880,7 @@ static blk_status_t commit_op(void *context) static bool process_flush_bio(struct cache *cache, struct bio *bio) { - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (!pb->req_nr) remap_to_origin(cache, bio); @@ -1983,28 +1940,6 @@ static void process_deferred_bios(struct work_struct *ws) schedule_commit(&cache->committer); } -static void process_deferred_writethrough_bios(struct work_struct *ws) -{ - struct cache *cache = container_of(ws, struct cache, deferred_writethrough_worker); - - unsigned long flags; - struct bio_list bios; - struct bio *bio; - - bio_list_init(&bios); - - spin_lock_irqsave(&cache->lock, flags); - bio_list_merge(&bios, &cache->deferred_writethrough_bios); - bio_list_init(&cache->deferred_writethrough_bios); - spin_unlock_irqrestore(&cache->lock, flags); - - /* - * These bios have already been through accounted_begin() - */ - while ((bio = bio_list_pop(&bios))) - generic_make_request(bio); -} - /*---------------------------------------------------------------- * Main worker loop *--------------------------------------------------------------*/ @@ -2112,6 +2047,9 @@ static void destroy(struct cache *cache) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); + if (cache->bs) + bioset_free(cache->bs); + kfree(cache); } @@ -2555,8 +2493,15 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->discards_supported = true; ti->split_discard_bios = false; + ti->per_io_data_size = sizeof(struct per_bio_data); + cache->features = ca->features; - ti->per_io_data_size = get_per_bio_data_size(cache); + if (writethrough_mode(cache)) { + /* Create bioset for writethrough bios issued to origin */ + cache->bs = bioset_create(BIO_POOL_SIZE, 0, 0); + if (!cache->bs) + goto bad; + } cache->callbacks.congested_fn = cache_is_congested; dm_table_add_target_callbacks(ti->table, &cache->callbacks); @@ -2618,7 +2563,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } - if (passthrough_mode(&cache->features)) { + if (passthrough_mode(cache)) { bool all_clean; r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); @@ -2637,9 +2582,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) } spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->deferred_cells); bio_list_init(&cache->deferred_bios); - bio_list_init(&cache->deferred_writethrough_bios); atomic_set(&cache->nr_allocated_migrations, 0); atomic_set(&cache->nr_io_migrations, 0); init_waitqueue_head(&cache->migration_wait); @@ -2678,8 +2621,6 @@ static int cache_create(struct cache_args *ca, struct cache **result) goto bad; } INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios); - INIT_WORK(&cache->deferred_writethrough_worker, - process_deferred_writethrough_bios); INIT_WORK(&cache->migration_worker, check_migrations); INIT_DELAYED_WORK(&cache->waker, do_waker); @@ -2795,9 +2736,8 @@ static int cache_map(struct dm_target *ti, struct bio *bio) int r; bool commit_needed; dm_oblock_t block = get_bio_block(cache, bio); - size_t pb_data_size = get_per_bio_data_size(cache); - init_per_bio_data(bio, pb_data_size); + init_per_bio_data(bio); if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* * This can only occur if the io goes to a partial block at @@ -2821,13 +2761,11 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return r; } -static int cache_end_io(struct dm_target *ti, struct bio *bio, - blk_status_t *error) +static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error) { struct cache *cache = ti->private; unsigned long flags; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + struct per_bio_data *pb = get_per_bio_data(bio); if (pb->tick) { policy_tick(cache->policy, false); @@ -3243,13 +3181,13 @@ static void cache_status(struct dm_target *ti, status_type_t type, else DMEMIT("1 "); - if (writethrough_mode(&cache->features)) + if (writethrough_mode(cache)) DMEMIT("writethrough "); - else if (passthrough_mode(&cache->features)) + else if (passthrough_mode(cache)) DMEMIT("passthrough "); - else if (writeback_mode(&cache->features)) + else if (writeback_mode(cache)) DMEMIT("writeback "); else { @@ -3415,7 +3353,7 @@ static int process_invalidate_cblocks_message(struct cache *cache, unsigned coun unsigned i; struct cblock_range range; - if (!passthrough_mode(&cache->features)) { + if (!passthrough_mode(cache)) { DMERR("%s: cache has to be in passthrough mode for invalidation", cache_device_name(cache)); return -EPERM; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 203144762f36..6a14f945783c 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -29,7 +29,6 @@ struct dm_kobject_holder { * DM targets must _not_ deference a mapped_device to directly access its members! */ struct mapped_device { - struct srcu_struct io_barrier; struct mutex suspend_lock; /* @@ -127,6 +126,8 @@ struct mapped_device { struct blk_mq_tag_set *tag_set; bool use_blk_mq:1; bool init_tio_pdu:1; + + struct srcu_struct io_barrier; }; void dm_init_md_queue(struct mapped_device *md); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 96ab46512e1f..9fc12f556534 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1075,7 +1075,7 @@ static int crypt_convert_block_aead(struct crypt_config *cc, BUG_ON(cc->integrity_iv_size && cc->integrity_iv_size != cc->iv_size); /* Reject unexpected unaligned bio. */ - if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) + if (unlikely(bv_in.bv_len & (cc->sector_size - 1))) return -EIO; dmreq = dmreq_of_req(cc, req); @@ -1168,7 +1168,7 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, int r = 0; /* Reject unexpected unaligned bio. */ - if (unlikely(bv_in.bv_offset & (cc->sector_size - 1))) + if (unlikely(bv_in.bv_len & (cc->sector_size - 1))) return -EIO; dmreq = dmreq_of_req(cc, req); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 8c5756e1df94..61180783ef42 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1377,7 +1377,7 @@ static int dm_integrity_map(struct dm_target *ti, struct bio *bio) struct bvec_iter iter; struct bio_vec bv; bio_for_each_segment(bv, bio, iter) { - if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { + if (unlikely(bv.bv_len & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) { DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary", bv.bv_offset, bv.bv_len, ic->sectors_per_block); return DM_MAPIO_KILL; diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 8b80a9ce9ea9..189badbeddaf 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -10,9 +10,11 @@ #include <linux/init.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/dax.h> #include <linux/slab.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/uio.h> #define DM_MSG_PREFIX "log-writes" @@ -246,27 +248,108 @@ error: return -1; } +static int write_inline_data(struct log_writes_c *lc, void *entry, + size_t entrylen, void *data, size_t datalen, + sector_t sector) +{ + int num_pages, bio_pages, pg_datalen, pg_sectorlen, i; + struct page *page; + struct bio *bio; + size_t ret; + void *ptr; + + while (datalen) { + num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT; + bio_pages = min(num_pages, BIO_MAX_PAGES); + + atomic_inc(&lc->io_blocks); + + bio = bio_alloc(GFP_KERNEL, bio_pages); + if (!bio) { + DMERR("Couldn't alloc inline data bio"); + goto error; + } + + bio->bi_iter.bi_size = 0; + bio->bi_iter.bi_sector = sector; + bio_set_dev(bio, lc->logdev->bdev); + bio->bi_end_io = log_end_io; + bio->bi_private = lc; + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + for (i = 0; i < bio_pages; i++) { + pg_datalen = min_t(int, datalen, PAGE_SIZE); + pg_sectorlen = ALIGN(pg_datalen, lc->sectorsize); + + page = alloc_page(GFP_KERNEL); + if (!page) { + DMERR("Couldn't alloc inline data page"); + goto error_bio; + } + + ptr = kmap_atomic(page); + memcpy(ptr, data, pg_datalen); + if (pg_sectorlen > pg_datalen) + memset(ptr + pg_datalen, 0, pg_sectorlen - pg_datalen); + kunmap_atomic(ptr); + + ret = bio_add_page(bio, page, pg_sectorlen, 0); + if (ret != pg_sectorlen) { + DMERR("Couldn't add page of inline data"); + __free_page(page); + goto error_bio; + } + + datalen -= pg_datalen; + data += pg_datalen; + } + submit_bio(bio); + + sector += bio_pages * PAGE_SECTORS; + } + return 0; +error_bio: + bio_free_pages(bio); + bio_put(bio); +error: + put_io_block(lc); + return -1; +} + static int log_one_block(struct log_writes_c *lc, struct pending_block *block, sector_t sector) { struct bio *bio; struct log_write_entry entry; - size_t ret; + size_t metadatalen, ret; int i; entry.sector = cpu_to_le64(block->sector); entry.nr_sectors = cpu_to_le64(block->nr_sectors); entry.flags = cpu_to_le64(block->flags); entry.data_len = cpu_to_le64(block->datalen); + + metadatalen = (block->flags & LOG_MARK_FLAG) ? block->datalen : 0; if (write_metadata(lc, &entry, sizeof(entry), block->data, - block->datalen, sector)) { + metadatalen, sector)) { free_pending_block(lc, block); return -1; } + sector += dev_to_bio_sectors(lc, 1); + + if (block->datalen && metadatalen == 0) { + if (write_inline_data(lc, &entry, sizeof(entry), block->data, + block->datalen, sector)) { + free_pending_block(lc, block); + return -1; + } + /* we don't support both inline data & bio data */ + goto out; + } + if (!block->vec_cnt) goto out; - sector += dev_to_bio_sectors(lc, 1); atomic_inc(&lc->io_blocks); bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); @@ -527,6 +610,51 @@ static int log_mark(struct log_writes_c *lc, char *data) return 0; } +static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, + struct iov_iter *i) +{ + struct pending_block *block; + + if (!bytes) + return 0; + + block = kzalloc(sizeof(struct pending_block), GFP_KERNEL); + if (!block) { + DMERR("Error allocating dax pending block"); + return -ENOMEM; + } + + block->data = kzalloc(bytes, GFP_KERNEL); + if (!block->data) { + DMERR("Error allocating dax data space"); + kfree(block); + return -ENOMEM; + } + + /* write data provided via the iterator */ + if (!copy_from_iter(block->data, bytes, i)) { + DMERR("Error copying dax data"); + kfree(block->data); + kfree(block); + return -EIO; + } + + /* rewind the iterator so that the block driver can use it */ + iov_iter_revert(i, bytes); + + block->datalen = bytes; + block->sector = bio_to_dev_sectors(lc, sector); + block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift; + + atomic_inc(&lc->pending_blocks); + spin_lock_irq(&lc->blocks_lock); + list_add_tail(&block->list, &lc->unflushed_blocks); + spin_unlock_irq(&lc->blocks_lock); + wake_up_process(lc->log_kthread); + + return 0; +} + static void log_writes_dtr(struct dm_target *ti) { struct log_writes_c *lc = ti->private; @@ -792,9 +920,46 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit limits->io_min = limits->physical_block_size; } +static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + int ret; + + ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff); + if (ret) + return ret; + return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn); +} + +static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + int err; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + + /* Don't bother doing anything if logging has been disabled */ + if (!lc->logging_enabled) + goto dax_copy; + + err = log_dax(lc, sector, bytes, i); + if (err) { + DMWARN("Error %d logging DAX write", err); + return 0; + } +dax_copy: + return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + static struct target_type log_writes_target = { .name = "log-writes", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = log_writes_ctr, .dtr = log_writes_dtr, @@ -805,6 +970,8 @@ static struct target_type log_writes_target = { .message = log_writes_message, .iterate_devices = log_writes_iterate_devices, .io_hints = log_writes_io_hints, + .direct_access = log_writes_dax_direct_access, + .dax_copy_from_iter = log_writes_dax_copy_from_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 2245d06d2045..a25eebd98996 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2143,13 +2143,6 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) struct dm_raid_superblock *refsb; uint64_t events_sb, events_refsb; - rdev->sb_start = 0; - rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev); - if (rdev->sb_size < sizeof(*sb) || rdev->sb_size > PAGE_SIZE) { - DMERR("superblock size of a logical block is no longer valid"); - return -EINVAL; - } - r = read_disk_sb(rdev, rdev->sb_size, false); if (r) return r; @@ -2494,6 +2487,17 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) if (test_bit(Journal, &rdev->flags)) continue; + if (!rdev->meta_bdev) + continue; + + /* Set superblock offset/size for metadata device. */ + rdev->sb_start = 0; + rdev->sb_size = bdev_logical_block_size(rdev->meta_bdev); + if (rdev->sb_size < sizeof(struct dm_raid_superblock) || rdev->sb_size > PAGE_SIZE) { + DMERR("superblock size of a logical block is no longer valid"); + return -EINVAL; + } + /* * Skipping super_load due to CTR_FLAG_SYNC will cause * the array to undergo initialization again as @@ -2506,9 +2510,6 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags)) continue; - if (!rdev->meta_bdev) - continue; - r = super_load(rdev, freshest); switch (r) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 75281828f2cb..433bd3f3014c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -451,15 +451,15 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, return r; } - atomic_set(&dd->count, 0); + refcount_set(&dd->count, 1); list_add(&dd->list, &t->devices); } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) { r = upgrade_mode(dd, mode, t->md); if (r) return r; + refcount_inc(&dd->count); } - atomic_inc(&dd->count); *result = dd->dm_dev; return 0; @@ -515,7 +515,7 @@ void dm_put_device(struct dm_target *ti, struct dm_dev *d) dm_device_name(ti->table->md), d->name); return; } - if (atomic_dec_and_test(&dd->count)) { + if (refcount_dec_and_test(&dd->count)) { dm_put_table_device(ti->table->md, d); list_del(&dd->list); kfree(dd); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index b87c1741da4b..6d7bda6f8190 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -660,6 +660,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) struct dmz_target *dmz = ti->private; struct request_queue *q; struct dmz_dev *dev; + sector_t aligned_capacity; int ret; /* Get the target device */ @@ -685,15 +686,17 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path) goto err; } + q = bdev_get_queue(dev->bdev); dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; - if (ti->begin || (ti->len != dev->capacity)) { + aligned_capacity = dev->capacity & ~(blk_queue_zone_sectors(q) - 1); + if (ti->begin || + ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) { ti->error = "Partial mapping not supported"; ret = -EINVAL; goto err; } - q = bdev_get_queue(dev->bdev); - dev->zone_nr_sectors = q->limits.chunk_sectors; + dev->zone_nr_sectors = blk_queue_zone_sectors(q); dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors); dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors); @@ -929,8 +932,10 @@ static int dmz_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct dmz_target *dmz = ti->private; + struct dmz_dev *dev = dmz->dev; + sector_t capacity = dev->capacity & ~(dev->zone_nr_sectors - 1); - return fn(ti, dmz->ddev, 0, dmz->dev->capacity, data); + return fn(ti, dmz->ddev, 0, capacity, data); } static struct target_type dmz_type = { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a3f8cbb98dd5..de17b7193299 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <linux/wait.h> #include <linux/pr.h> +#include <linux/refcount.h> #define DM_MSG_PREFIX "core" @@ -98,7 +99,7 @@ struct dm_md_mempools { struct table_device { struct list_head list; - atomic_t count; + refcount_t count; struct dm_dev dm_dev; }; @@ -685,10 +686,11 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, format_dev_t(td->dm_dev.name, dev); - atomic_set(&td->count, 0); + refcount_set(&td->count, 1); list_add(&td->list, &md->table_devices); + } else { + refcount_inc(&td->count); } - atomic_inc(&td->count); mutex_unlock(&md->table_devices_lock); *result = &td->dm_dev; @@ -701,7 +703,7 @@ void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) struct table_device *td = container_of(d, struct table_device, dm_dev); mutex_lock(&md->table_devices_lock); - if (atomic_dec_and_test(&td->count)) { + if (refcount_dec_and_test(&td->count)) { close_table_device(td, md); list_del(&td->list); kfree(td); @@ -718,7 +720,7 @@ static void free_table_devices(struct list_head *devices) struct table_device *td = list_entry(tmp, struct table_device, list); DMWARN("dm_destroy: %s still exists with %d references", - td->dm_dev.name, atomic_read(&td->count)); + td->dm_dev.name, refcount_read(&td->count)); kfree(td); } } @@ -1684,7 +1686,7 @@ static struct mapped_device *alloc_dev(int minor) struct mapped_device *md; void *old_md; - md = kzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); + md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); if (!md) { DMWARN("unable to allocate device, out of memory."); return NULL; @@ -1784,7 +1786,7 @@ bad_io_barrier: bad_minor: module_put(THIS_MODULE); bad_module_get: - kfree(md); + kvfree(md); return NULL; } @@ -1803,7 +1805,7 @@ static void free_dev(struct mapped_device *md) free_minor(minor); module_put(THIS_MODULE); - kfree(md); + kvfree(md); } static void __bind_mempools(struct mapped_device *md, struct dm_table *t) @@ -2061,17 +2063,12 @@ struct mapped_device *dm_get_md(dev_t dev) spin_lock(&_minor_lock); md = idr_find(&_minor_idr, minor); - if (md) { - if ((md == MINOR_ALLOCED || - (MINOR(disk_devt(dm_disk(md))) != minor) || - dm_deleting_md(md) || - test_bit(DMF_FREEING, &md->flags))) { - md = NULL; - goto out; - } - dm_get(md); + if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) || + test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { + md = NULL; + goto out; } - + dm_get(md); out: spin_unlock(&_minor_lock); @@ -2698,11 +2695,15 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) md = container_of(kobj, struct mapped_device, kobj_holder.kobj); - if (test_bit(DMF_FREEING, &md->flags) || - dm_deleting_md(md)) - return NULL; - + spin_lock(&_minor_lock); + if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { + md = NULL; + goto out; + } dm_get(md); +out: + spin_unlock(&_minor_lock); + return md; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 38c84c0a35d4..36399bb875dd 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -19,6 +19,7 @@ #include <linux/hdreg.h> #include <linux/completion.h> #include <linux/kobject.h> +#include <linux/refcount.h> #include "dm-stats.h" @@ -38,7 +39,7 @@ */ struct dm_dev_internal { struct list_head list; - atomic_t count; + refcount_t count; struct dm_dev *dm_dev; }; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 4aed69d9dd17..aec449243966 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/device-mapper.h> +#include <linux/kernel.h> #define DM_MSG_PREFIX "space map metadata" @@ -111,7 +112,7 @@ static bool brb_empty(struct bop_ring_buffer *brb) static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) { unsigned r = old + 1; - return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; + return r >= ARRAY_SIZE(brb->bops) ? 0 : r; } static int brb_push(struct bop_ring_buffer *brb, |