summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-03-10 08:45:44 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-03-10 08:45:44 -0800
commitb3b25b1d9e104352b8272488ab94145fe84c4261 (patch)
tree0a09b1e8670c1d23527d876b1fd8c360d83a1869
parent2f64e70cd0fc5db1d2e41dac1fc668951840f9ed (diff)
parentc934edadcc7a64e399942ae34b912939057a77a7 (diff)
downloadlwn-b3b25b1d9e104352b8272488ab94145fe84c4261.tar.gz
lwn-b3b25b1d9e104352b8272488ab94145fe84c4261.zip
Merge tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer: - Fix an uninitialized variable false warning in dm bufio - Fix DM's passthrough ioctl support to be race free against an underlying device being removed. - Fix corner-case of DM raid resync reporting if/when the raid becomes degraded during resync; otherwise automated raid repair will fail. - A few DM multipath fixes to make non-SCSI optimizations, that were introduced during the 4.16 merge, useful for all non-SCSI devices, rather than narrowly define this non-SCSI mode in terms of "nvme". This allows the removal of "queue_mode nvme" that really didn't need to be introduced. Instead DM core will internalize whether nvme-specific IO submission optimizations are doable and DM multipath will only do SCSI-specific device handler operations if SCSI is in use. * tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm table: allow upgrade from bio-based to specialized bio-based variant dm mpath: remove unnecessary NVMe branching in favor of scsi_dh checks dm table: fix "nvme" test dm raid: fix incorrect sync_ratio when degraded dm: use blkdev_get rather than bdgrab when issuing pass-through ioctl dm bufio: avoid false-positive Wmaybe-uninitialized warning
-rw-r--r--drivers/md/dm-bufio.c16
-rw-r--r--drivers/md/dm-mpath.c66
-rw-r--r--drivers/md/dm-raid.c7
-rw-r--r--drivers/md/dm-table.c16
-rw-r--r--drivers/md/dm.c35
5 files changed, 65 insertions, 75 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 414c9af54ded..aa2032fa80d4 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
enum data_mode *data_mode)
{
- unsigned noio_flag;
- void *ptr;
-
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
*data_mode = DATA_MODE_SLAB;
return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
* all allocations done by this process (including pagetables) are done
* as if GFP_NOIO was specified.
*/
+ if (gfp_mask & __GFP_NORETRY) {
+ unsigned noio_flag = memalloc_noio_save();
+ void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
- if (gfp_mask & __GFP_NORETRY)
- noio_flag = memalloc_noio_save();
-
- ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
- if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
+ return ptr;
+ }
- return ptr;
+ return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
}
/*
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7d3e572072f5..3fde9e9faddd 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -22,6 +22,7 @@
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
+#include <scsi/scsi_device.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
@@ -211,25 +212,13 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
- } else if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+ } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
-
- if (m->queue_mode == DM_TYPE_BIO_BASED) {
- /*
- * bio-based doesn't support any direct scsi_dh management;
- * it just discovers if a scsi_dh is attached.
- */
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
- }
- }
-
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
- set_bit(MPATHF_QUEUE_IO, &m->flags);
- atomic_set(&m->pg_init_in_progress, 0);
- atomic_set(&m->pg_init_count, 0);
- m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
- init_waitqueue_head(&m->pg_init_wait);
+ /*
+ * bio-based doesn't support any direct scsi_dh management;
+ * it just discovers if a scsi_dh is attached.
+ */
+ set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
}
dm_table_set_type(ti->table, m->queue_mode);
@@ -337,14 +326,12 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
m->current_pg = pg;
- if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
- return;
-
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
set_bit(MPATHF_QUEUE_IO, &m->flags);
} else {
+ /* FIXME: not needed if no scsi_dh is attached */
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
clear_bit(MPATHF_QUEUE_IO, &m->flags);
}
@@ -385,8 +372,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
- clear_bit(MPATHF_QUEUE_IO, &m->flags);
+ clear_bit(MPATHF_QUEUE_IO, &m->flags);
goto failed;
}
@@ -599,7 +585,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
return pgpath;
}
-static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
unsigned long flags;
@@ -634,8 +620,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
{
struct pgpath *pgpath;
- if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
- pgpath = __map_bio_nvme(m, bio);
+ if (!m->hw_handler_name)
+ pgpath = __map_bio_fast(m, bio);
else
pgpath = __map_bio(m, bio);
@@ -675,8 +661,7 @@ static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
- else if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
@@ -838,6 +823,16 @@ retain:
*/
kfree(m->hw_handler_name);
m->hw_handler_name = attached_handler_name;
+
+ /*
+ * Init fields that are only used when a scsi_dh is attached
+ */
+ if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) {
+ atomic_set(&m->pg_init_in_progress, 0);
+ atomic_set(&m->pg_init_count, 0);
+ m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+ init_waitqueue_head(&m->pg_init_wait);
+ }
}
}
@@ -873,6 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
int r;
struct pgpath *p;
struct multipath *m = ti->private;
+ struct scsi_device *sdev;
/* we need at least a path arg */
if (as->argc < 1) {
@@ -891,7 +887,9 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
goto bad;
}
- if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev));
+ if (sdev) {
+ put_device(&sdev->sdev_gendev);
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
if (r) {
@@ -1001,8 +999,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
- if (m->queue_mode == DM_TYPE_BIO_BASED ||
- m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
dm_consume_args(as, hw_argc);
DMERR("bio-based multipath doesn't allow hardware handler args");
return 0;
@@ -1091,8 +1088,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
- else if (!strcasecmp(queue_mode_name, "nvme"))
- m->queue_mode = DM_TYPE_NVME_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1193,7 +1188,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1730,9 +1725,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
- case DM_TYPE_NVME_BIO_BASED:
- DMEMIT("queue_mode nvme ");
- break;
case DM_TYPE_MQ_REQUEST_BASED:
DMEMIT("queue_mode mq ");
break;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 7ef469e902c6..c1d1034ff7b7 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
- test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
- test_bit(MD_RECOVERY_RUNNING, &recovery))
+ if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
+ (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
+ test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
+ test_bit(MD_RECOVERY_RUNNING, &recovery)))
r = mddev->curr_resync_completed;
else
r = mddev->recovery_cp;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5fe7ec356c33..7eb3e2a3c07d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
- if (t->type == DM_TYPE_BIO_BASED)
- return 0;
- else if (t->type == DM_TYPE_NVME_BIO_BASED) {
- if (!dm_table_does_not_support_partial_completion(t)) {
- DMERR("nvme bio-based is only possible with devices"
- " that don't support partial completion");
- return -EINVAL;
- }
- /* Fallthru, also verify all devices are blk-mq */
+ if (t->type == DM_TYPE_BIO_BASED) {
+ /* possibly upgrade to a variant of bio-based */
+ goto verify_bio_based;
}
BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
+ BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
goto verify_rq_based;
}
@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
}
if (bio_based) {
+verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t) ||
@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
char b[BDEVNAME_SIZE];
/* For now, NVMe devices are the only devices of this class */
- return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
+ return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
}
static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68136806d365..45328d8b2859 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
-static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
- struct block_device **bdev,
- fmode_t *mode)
+static char *_dm_claim_ptr = "I belong to device-mapper";
+
+static int dm_get_bdev_for_ioctl(struct mapped_device *md,
+ struct block_device **bdev,
+ fmode_t *mode)
{
struct dm_target *tgt;
struct dm_table *map;
@@ -490,6 +492,10 @@ retry:
goto out;
bdgrab(*bdev);
+ r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
+ if (r < 0)
+ goto out;
+
dm_put_live_table(md, srcu_idx);
return r;
@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
struct mapped_device *md = bdev->bd_disk->private_data;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
- static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(td->dm_dev.bdev);
- bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
+ bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
@@ -3011,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3021,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3032,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3042,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3054,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3064,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}
@@ -3075,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
fmode_t mode;
int r;
- r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
+ r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
@@ -3085,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
else
r = -EOPNOTSUPP;
- bdput(bdev);
+ blkdev_put(bdev, mode);
return r;
}