summaryrefslogtreecommitdiff
path: root/block/blk-core.c
diff options
context:
space:
mode:
authorBart Van Assche <bart.vanassche@wdc.com>2017-11-09 10:49:58 -0800
committerJens Axboe <axboe@kernel.dk>2017-11-10 19:53:25 -0700
commit3a0a529971ec4e2d933e9c7798db101dfb6b1aec (patch)
treee79cac20198e657afc109f6f80111b0fb03f9dbc /block/blk-core.c
parentc9254f2ddb19387ea9714a57ea48463c20333b92 (diff)
downloadlwn-3a0a529971ec4e2d933e9c7798db101dfb6b1aec.tar.gz
lwn-3a0a529971ec4e2d933e9c7798db101dfb6b1aec.zip
block, scsi: Make SCSI quiesce and resume work reliably
The contexts from which a SCSI device can be quiesced or resumed are: * Writing into /sys/class/scsi_device/*/device/state. * SCSI parallel (SPI) domain validation. * The SCSI device power management methods. See also scsi_bus_pm_ops. It is essential during suspend and resume that neither the filesystem state nor the filesystem metadata in RAM changes. This is why while the hibernation image is being written or restored that SCSI devices are quiesced. The SCSI core quiesces devices through scsi_device_quiesce() and scsi_device_resume(). In the SDEV_QUIESCE state execution of non-preempt requests is deferred. This is realized by returning BLKPREP_DEFER from inside scsi_prep_state_check() for quiesced SCSI devices. Avoid that a full queue prevents power management requests to be submitted by deferring allocation of non-preempt requests for devices in the quiesced state. This patch has been tested by running the following commands and by verifying that after each resume the fio job was still running: for ((i=0; i<10; i++)); do ( cd /sys/block/md0/md && while true; do [ "$(<sync_action)" = "idle" ] && echo check > sync_action sleep 1 done ) & pids=($!) for d in /sys/class/block/sd*[a-z]; do bdev=${d#/sys/class/block/} hcil=$(readlink "$d/device") hcil=${hcil#../../../} echo 4 > "$d/queue/nr_requests" echo 1 > "/sys/class/scsi_device/$hcil/device/queue_depth" fio --name="$bdev" --filename="/dev/$bdev" --buffered=0 --bs=512 \ --rw=randread --ioengine=libaio --numjobs=4 --iodepth=16 \ --iodepth_batch=1 --thread --loops=$((2**31)) & pids+=($!) done sleep 1 echo "$(date) Hibernating ..." >>hibernate-test-log.txt systemctl hibernate sleep 10 kill "${pids[@]}" echo idle > /sys/block/md0/md/sync_action wait echo "$(date) Done." >>hibernate-test-log.txt done Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name> References: "I/O hangs after resuming from suspend-to-ram" (https://marc.info/?l=linux-block&m=150340235201348). Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com> Reviewed-by: Hannes Reinecke <hare@suse.com> Tested-by: Martin Steigerwald <martin@lichtvoll.de> Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name> Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: Ming Lei <ming.lei@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-core.c')
-rw-r--r--block/blk-core.c42
1 files changed, 34 insertions, 8 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index edc276899116..29b08428ae45 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -374,6 +374,7 @@ void blk_clear_preempt_only(struct request_queue *q)
spin_lock_irqsave(q->queue_lock, flags);
queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+ wake_up_all(&q->mq_freeze_wq);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
@@ -795,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
-int blk_queue_enter(struct request_queue *q, bool nowait)
+/**
+ * blk_queue_enter() - try to increase q->q_usage_counter
+ * @q: request queue pointer
+ * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
+ */
+int blk_queue_enter(struct request_queue *q, unsigned int flags)
{
+ const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
+
while (true) {
+ bool success = false;
int ret;
- if (percpu_ref_tryget_live(&q->q_usage_counter))
+ rcu_read_lock_sched();
+ if (percpu_ref_tryget_live(&q->q_usage_counter)) {
+ /*
+ * The code that sets the PREEMPT_ONLY flag is
+ * responsible for ensuring that that flag is globally
+ * visible before the queue is unfrozen.
+ */
+ if (preempt || !blk_queue_preempt_only(q)) {
+ success = true;
+ } else {
+ percpu_ref_put(&q->q_usage_counter);
+ }
+ }
+ rcu_read_unlock_sched();
+
+ if (success)
return 0;
- if (nowait)
+ if (flags & BLK_MQ_REQ_NOWAIT)
return -EBUSY;
/*
@@ -816,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
smp_rmb();
ret = wait_event_interruptible(q->mq_freeze_wq,
- !atomic_read(&q->mq_freeze_depth) ||
+ (atomic_read(&q->mq_freeze_depth) == 0 &&
+ (preempt || !blk_queue_preempt_only(q))) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
@@ -1445,8 +1470,7 @@ static struct request *blk_old_get_request(struct request_queue *q,
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
- ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) ||
- (op & REQ_NOWAIT));
+ ret = blk_queue_enter(q, flags);
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
@@ -2267,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio)
current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bio->bi_disk->queue;
+ unsigned int flags = bio->bi_opf & REQ_NOWAIT ?
+ BLK_MQ_REQ_NOWAIT : 0;
- if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
+ if (likely(blk_queue_enter(q, flags) == 0)) {
struct bio_list lower, same;
/* Create a fresh bio_list for all subordinate requests */
@@ -2327,7 +2353,7 @@ blk_qc_t direct_make_request(struct bio *bio)
if (!generic_make_request_checks(bio))
return BLK_QC_T_NONE;
- if (unlikely(blk_queue_enter(q, nowait))) {
+ if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
if (nowait && !blk_queue_dying(q))
bio->bi_status = BLK_STS_AGAIN;
else