diff options
author | Stefan Haberland <sth@linux.vnet.ibm.com> | 2017-05-16 10:30:13 +0200 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2017-06-12 16:26:01 +0200 |
commit | 2757fe1d8ebd0e6ab1dbf1105978b8c8369dcc49 (patch) | |
tree | 581299b3b20bcea725808b7a30ed0138df0fb25d | |
parent | b487a914f853545842a0899329b6b72fe56c4081 (diff) | |
download | lwn-2757fe1d8ebd0e6ab1dbf1105978b8c8369dcc49.tar.gz lwn-2757fe1d8ebd0e6ab1dbf1105978b8c8369dcc49.zip |
s390/dasd: fix unusable device after safe offline processing
The safe offline processing needs, as well as the normal offline
processing, to be locked against multiple parallel executions. But it
should be able to be overtaken by a normal offline processing to make sure
that the device does not wait forever for outstanding I/O if the user
wants to.
Unfortunately the parallel processing of safe offline and normal offline
might lead to a race situation where both threads report successful
execution to the CIO layer which in turn tries to deregister the kobject
of the device twice. This leads to a
refcount_t: underflow; use-after-free.
error and the device is not able to be set online again afterwards without
a reboot.
Correct the locking of the safe offline processing by doing the following:
- Use the cdev lock to secure all set and test operations to the
device flags.
- Two safe offline processes are locked against each other using
the DASD_FLAG_SAFE_OFFLINE and DASD_FLAG_SAFE_OFFLINE_RUNNING
device flags.
The differentiation between offline triggered and offline running
is needed since the normal offline attribute is owned by CIO and
we have to pass over control in between.
- The dasd_generic_set_offline process handles the offline
processing. It is locked against parallel execution using the
DASD_FLAG_OFFLINE.
- Only a running safe offline should be able to be overtaken by a
single normal offline. This is ensured by clearing the
DASD_FLAG_SAFE_OFFLINE_RUNNING flag when a normal offline
overtakes. So this can only happen ones.
- The safe offline just aborts in this case doing nothing and
the normal offline processing finishes as usual.
Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
-rw-r--r-- | drivers/s390/block/dasd.c | 68 | ||||
-rw-r--r-- | drivers/s390/block/dasd_devmap.c | 7 |
2 files changed, 44 insertions, 31 deletions
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 6fb3fd5efc11..b0c65dcb6865 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3562,57 +3562,69 @@ int dasd_generic_set_offline(struct ccw_device *cdev) else pr_warn("%s: The DASD cannot be set offline while it is in use\n", dev_name(&cdev->dev)); - clear_bit(DASD_FLAG_OFFLINE, &device->flags); - goto out_busy; + rc = -EBUSY; + goto out_err; } } - if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { - /* - * safe offline already running - * could only be called by normal offline so safe_offline flag - * needs to be removed to run normal offline and kill all I/O - */ - if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) - /* Already doing normal offline processing */ - goto out_busy; - else - clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); - } else { - if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) - /* Already doing offline processing */ - goto out_busy; + /* + * Test if the offline processing is already running and exit if so. + * If a safe offline is being processed this could only be a normal + * offline that should be able to overtake the safe offline and + * cancel any I/O we do not want to wait for any longer + */ + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, + &device->flags); + } else { + rc = -EBUSY; + goto out_err; + } } - set_bit(DASD_FLAG_OFFLINE, &device->flags); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); /* - * if safe_offline called set safe_offline_running flag and + * if safe_offline is called set safe_offline_running flag and * clear safe_offline so that a call to normal offline * can overrun safe_offline processing */ if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) && !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + /* need to unlock here to wait for outstanding I/O */ + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); /* * If we want to set the device safe offline all IO operations * should be finished before continuing the offline process * so sync bdev first and then wait for our queues to become * empty */ - /* sync blockdev and partitions */ if (device->block) { rc = fsync_bdev(device->block->bdev); if (rc != 0) goto interrupted; } - /* schedule device tasklet and wait for completion */ dasd_schedule_device_bh(device); rc = wait_event_interruptible(shutdown_waitq, _wait_for_empty_queues(device)); if (rc != 0) goto interrupted; + + /* + * check if a normal offline process overtook the offline + * processing in this case simply do nothing beside returning + * that we got interrupted + * otherwise mark safe offline as not running any longer and + * continue with normal offline + */ + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + if (!test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { + rc = -ERESTARTSYS; + goto out_err; + } + clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags); } + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); dasd_set_target_state(device, DASD_STATE_NEW); /* dasd_delete_device destroys the device reference. */ @@ -3624,22 +3636,18 @@ int dasd_generic_set_offline(struct ccw_device *cdev) */ if (block) dasd_free_block(block); + return 0; interrupted: /* interrupted by signal */ - clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags); clear_bit(DASD_FLAG_OFFLINE, &device->flags); - dasd_put_device(device); - - return rc; - -out_busy: +out_err: dasd_put_device(device); spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); - - return -EBUSY; + return rc; } EXPORT_SYMBOL_GPL(dasd_generic_set_offline); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 0ce84f0a4d7f..e943d9c48926 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -950,11 +950,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr, { struct ccw_device *cdev = to_ccwdev(dev); struct dasd_device *device; + unsigned long flags; int rc; - device = dasd_device_from_cdev(cdev); + spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + device = dasd_device_from_cdev_locked(cdev); if (IS_ERR(device)) { rc = PTR_ERR(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); goto out; } @@ -962,12 +965,14 @@ dasd_safe_offline_store(struct device *dev, struct device_attribute *attr, test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { /* Already doing offline processing */ dasd_put_device(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); rc = -EBUSY; goto out; } set_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags); dasd_put_device(device); + spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); rc = ccw_device_set_offline(cdev); |