summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c218
-rw-r--r--block/blk-flush.c23
-rw-r--r--block/blk-ioc.c42
-rw-r--r--block/blk-lib.c31
-rw-r--r--block/blk-mq-sched.c4
-rw-r--r--block/blk-mq-tag.h12
-rw-r--r--block/blk-mq.c62
-rw-r--r--block/blk-rq-qos.c2
-rw-r--r--block/blk-timeout.c22
-rw-r--r--block/blk.h19
-rw-r--r--block/genhd.c12
-rw-r--r--block/partitions/core.c2
12 files changed, 251 insertions, 198 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 594f1d0b0e5a..619a79b51068 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -95,9 +95,6 @@ static void __blkg_release(struct rcu_head *rcu)
css_put(&blkg->blkcg->css);
if (blkg->parent)
blkg_put(blkg->parent);
-
- wb_congested_put(blkg->wb_congested);
-
blkg_free(blkg);
}
@@ -227,7 +224,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *new_blkg)
{
struct blkcg_gq *blkg;
- struct bdi_writeback_congested *wb_congested;
int i, ret;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -245,31 +241,22 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
goto err_free_blkg;
}
- wb_congested = wb_congested_get_create(q->backing_dev_info,
- blkcg->css.id,
- GFP_NOWAIT | __GFP_NOWARN);
- if (!wb_congested) {
- ret = -ENOMEM;
- goto err_put_css;
- }
-
/* allocate */
if (!new_blkg) {
new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto err_put_congested;
+ goto err_put_css;
}
}
blkg = new_blkg;
- blkg->wb_congested = wb_congested;
/* link parent */
if (blkcg_parent(blkcg)) {
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
if (WARN_ON_ONCE(!blkg->parent)) {
ret = -ENODEV;
- goto err_put_congested;
+ goto err_put_css;
}
blkg_get(blkg->parent);
}
@@ -306,8 +293,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
-err_put_congested:
- wb_congested_put(wb_congested);
err_put_css:
css_put(&blkcg->css);
err_free_blkg:
@@ -726,12 +711,137 @@ void blkg_conf_finish(struct blkg_conf_ctx *ctx)
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] = src->bytes[i];
+ dst->ios[i] = src->ios[i];
+ }
+}
+
+static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] += src->bytes[i];
+ dst->ios[i] += src->ios[i];
+ }
+}
+
+static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] -= src->bytes[i];
+ dst->ios[i] -= src->ios[i];
+ }
+}
+
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct blkcg_gq *parent = blkg->parent;
+ struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
+ struct blkg_iostat cur, delta;
+ unsigned int seq;
+
+ /* fetch the current per-cpu values */
+ do {
+ seq = u64_stats_fetch_begin(&bisc->sync);
+ blkg_iostat_set(&cur, &bisc->cur);
+ } while (u64_stats_fetch_retry(&bisc->sync, seq));
+
+ /* propagate percpu delta to global */
+ u64_stats_update_begin(&blkg->iostat.sync);
+ blkg_iostat_set(&delta, &cur);
+ blkg_iostat_sub(&delta, &bisc->last);
+ blkg_iostat_add(&blkg->iostat.cur, &delta);
+ blkg_iostat_add(&bisc->last, &delta);
+ u64_stats_update_end(&blkg->iostat.sync);
+
+ /* propagate global delta to parent */
+ if (parent) {
+ u64_stats_update_begin(&parent->iostat.sync);
+ blkg_iostat_set(&delta, &blkg->iostat.cur);
+ blkg_iostat_sub(&delta, &blkg->iostat.last);
+ blkg_iostat_add(&parent->iostat.cur, &delta);
+ blkg_iostat_add(&blkg->iostat.last, &delta);
+ u64_stats_update_end(&parent->iostat.sync);
+ }
+ }
+
+ rcu_read_unlock();
+}
+
+/*
+ * The rstat algorithms intentionally don't handle the root cgroup to avoid
+ * incurring overhead when no cgroups are defined. For that reason,
+ * cgroup_rstat_flush in blkcg_print_stat does not actually fill out the
+ * iostat in the root cgroup's blkcg_gq.
+ *
+ * However, we would like to re-use the printing code between the root and
+ * non-root cgroups to the extent possible. For that reason, we simulate
+ * flushing the root cgroup's stats by explicitly filling in the iostat
+ * with disk level statistics.
+ */
+static void blkcg_fill_root_iostats(void)
+{
+ struct class_dev_iter iter;
+ struct device *dev;
+
+ class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+ while ((dev = class_dev_iter_next(&iter))) {
+ struct gendisk *disk = dev_to_disk(dev);
+ struct hd_struct *part = disk_get_part(disk, 0);
+ struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue);
+ struct blkg_iostat tmp;
+ int cpu;
+
+ memset(&tmp, 0, sizeof(tmp));
+ for_each_possible_cpu(cpu) {
+ struct disk_stats *cpu_dkstats;
+
+ cpu_dkstats = per_cpu_ptr(part->dkstats, cpu);
+ tmp.ios[BLKG_IOSTAT_READ] +=
+ cpu_dkstats->ios[STAT_READ];
+ tmp.ios[BLKG_IOSTAT_WRITE] +=
+ cpu_dkstats->ios[STAT_WRITE];
+ tmp.ios[BLKG_IOSTAT_DISCARD] +=
+ cpu_dkstats->ios[STAT_DISCARD];
+ // convert sectors to bytes
+ tmp.bytes[BLKG_IOSTAT_READ] +=
+ cpu_dkstats->sectors[STAT_READ] << 9;
+ tmp.bytes[BLKG_IOSTAT_WRITE] +=
+ cpu_dkstats->sectors[STAT_WRITE] << 9;
+ tmp.bytes[BLKG_IOSTAT_DISCARD] +=
+ cpu_dkstats->sectors[STAT_DISCARD] << 9;
+
+ u64_stats_update_begin(&blkg->iostat.sync);
+ blkg_iostat_set(&blkg->iostat.cur, &tmp);
+ u64_stats_update_end(&blkg->iostat.sync);
+ }
+ }
+}
+
static int blkcg_print_stat(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
struct blkcg_gq *blkg;
- cgroup_rstat_flush(blkcg->css.cgroup);
+ if (!seq_css(sf)->parent)
+ blkcg_fill_root_iostats();
+ else
+ cgroup_rstat_flush(blkcg->css.cgroup);
+
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -820,7 +930,6 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
static struct cftype blkcg_files[] = {
{
.name = "stat",
- .flags = CFTYPE_NOT_ON_ROOT,
.seq_show = blkcg_print_stat,
},
{ } /* terminate */
@@ -1101,77 +1210,6 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
return ret;
}
-static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
- int i;
-
- for (i = 0; i < BLKG_IOSTAT_NR; i++) {
- dst->bytes[i] = src->bytes[i];
- dst->ios[i] = src->ios[i];
- }
-}
-
-static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
- int i;
-
- for (i = 0; i < BLKG_IOSTAT_NR; i++) {
- dst->bytes[i] += src->bytes[i];
- dst->ios[i] += src->ios[i];
- }
-}
-
-static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
- int i;
-
- for (i = 0; i < BLKG_IOSTAT_NR; i++) {
- dst->bytes[i] -= src->bytes[i];
- dst->ios[i] -= src->ios[i];
- }
-}
-
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
-{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg_gq *blkg;
-
- rcu_read_lock();
-
- hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- struct blkcg_gq *parent = blkg->parent;
- struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
- struct blkg_iostat cur, delta;
- unsigned seq;
-
- /* fetch the current per-cpu values */
- do {
- seq = u64_stats_fetch_begin(&bisc->sync);
- blkg_iostat_set(&cur, &bisc->cur);
- } while (u64_stats_fetch_retry(&bisc->sync, seq));
-
- /* propagate percpu delta to global */
- u64_stats_update_begin(&blkg->iostat.sync);
- blkg_iostat_set(&delta, &cur);
- blkg_iostat_sub(&delta, &bisc->last);
- blkg_iostat_add(&blkg->iostat.cur, &delta);
- blkg_iostat_add(&bisc->last, &delta);
- u64_stats_update_end(&blkg->iostat.sync);
-
- /* propagate global delta to parent */
- if (parent) {
- u64_stats_update_begin(&parent->iostat.sync);
- blkg_iostat_set(&delta, &blkg->iostat.cur);
- blkg_iostat_sub(&delta, &blkg->iostat.last);
- blkg_iostat_add(&parent->iostat.cur, &delta);
- blkg_iostat_add(&blkg->iostat.last, &delta);
- u64_stats_update_end(&parent->iostat.sync);
- }
- }
-
- rcu_read_unlock();
-}
-
static void blkcg_bind(struct cgroup_subsys_state *root_css)
{
int i;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 15ae0155ec07..6e1543c10493 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -219,7 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
struct request *rq, *n;
unsigned long flags = 0;
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
- struct blk_mq_hw_ctx *hctx;
blk_account_io_flush(flush_rq);
@@ -235,13 +234,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
if (fq->rq_status != BLK_STS_OK)
error = fq->rq_status;
- hctx = flush_rq->mq_hctx;
if (!q->elevator) {
- blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
- flush_rq->tag = -1;
+ flush_rq->tag = BLK_MQ_NO_TAG;
} else {
blk_mq_put_driver_tag(flush_rq);
- flush_rq->internal_tag = -1;
+ flush_rq->internal_tag = BLK_MQ_NO_TAG;
}
running = &fq->flush_queue[fq->flush_running_idx];
@@ -286,13 +283,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
return;
- /* C2 and C3
- *
- * For blk-mq + scheduling, we can risk having all driver tags
- * assigned to empty flushes, and we deadlock if we are expecting
- * other requests to make progress. Don't defer for that case.
- */
- if (!list_empty(&fq->flush_data_in_flight) && q->elevator &&
+ /* C2 and C3 */
+ if (!list_empty(&fq->flush_data_in_flight) &&
time_before(jiffies,
fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
return;
@@ -316,13 +308,10 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->mq_ctx = first_rq->mq_ctx;
flush_rq->mq_hctx = first_rq->mq_hctx;
- if (!q->elevator) {
- fq->orig_rq = first_rq;
+ if (!q->elevator)
flush_rq->tag = first_rq->tag;
- blk_mq_tag_set_rq(flush_rq->mq_hctx, first_rq->tag, flush_rq);
- } else {
+ else
flush_rq->internal_tag = first_rq->internal_tag;
- }
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 9df50fb507ca..57299f860d41 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -96,15 +96,7 @@ static void ioc_release_fn(struct work_struct *work)
{
struct io_context *ioc = container_of(work, struct io_context,
release_work);
- unsigned long flags;
-
- /*
- * Exiting icq may call into put_io_context() through elevator
- * which will trigger lockdep warning. The ioc's are guaranteed to
- * be different, use a different locking subclass here. Use
- * irqsave variant as there's no spin_lock_irq_nested().
- */
- spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ spin_lock_irq(&ioc->lock);
while (!hlist_empty(&ioc->icq_list)) {
struct io_cq *icq = hlist_entry(ioc->icq_list.first,
@@ -115,13 +107,27 @@ static void ioc_release_fn(struct work_struct *work)
ioc_destroy_icq(icq);
spin_unlock(&q->queue_lock);
} else {
- spin_unlock_irqrestore(&ioc->lock, flags);
- cpu_relax();
- spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ /* Make sure q and icq cannot be freed. */
+ rcu_read_lock();
+
+ /* Re-acquire the locks in the correct order. */
+ spin_unlock(&ioc->lock);
+ spin_lock(&q->queue_lock);
+ spin_lock(&ioc->lock);
+
+ /*
+ * The icq may have been destroyed when the ioc lock
+ * was released.
+ */
+ if (!(icq->flags & ICQ_DESTROYED))
+ ioc_destroy_icq(icq);
+
+ spin_unlock(&q->queue_lock);
+ rcu_read_unlock();
}
}
- spin_unlock_irqrestore(&ioc->lock, flags);
+ spin_unlock_irq(&ioc->lock);
kmem_cache_free(iocontext_cachep, ioc);
}
@@ -170,7 +176,6 @@ void put_io_context(struct io_context *ioc)
*/
void put_io_context_active(struct io_context *ioc)
{
- unsigned long flags;
struct io_cq *icq;
if (!atomic_dec_and_test(&ioc->active_ref)) {
@@ -178,19 +183,14 @@ void put_io_context_active(struct io_context *ioc)
return;
}
- /*
- * Need ioc lock to walk icq_list and q lock to exit icq. Perform
- * reverse double locking. Read comment in ioc_release_fn() for
- * explanation on the nested locking annotation.
- */
- spin_lock_irqsave_nested(&ioc->lock, flags, 1);
+ spin_lock_irq(&ioc->lock);
hlist_for_each_entry(icq, &ioc->icq_list, ioc_node) {
if (icq->flags & ICQ_EXITED)
continue;
ioc_exit_icq(icq);
}
- spin_unlock_irqrestore(&ioc->lock, flags);
+ spin_unlock_irq(&ioc->lock);
put_io_context(ioc);
}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 5f2c429d4378..019e09bb9c0e 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -29,7 +29,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
struct request_queue *q = bdev_get_queue(bdev);
struct bio *bio = *biop;
unsigned int op;
- sector_t bs_mask;
+ sector_t bs_mask, part_offset = 0;
if (!q)
return -ENXIO;
@@ -54,9 +54,34 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (!nr_sects)
return -EINVAL;
+ /* In case the discard request is in a partition */
+ if (bdev->bd_partno)
+ part_offset = bdev->bd_part->start_sect;
+
while (nr_sects) {
- sector_t req_sects = min_t(sector_t, nr_sects,
- bio_allowed_max_sectors(q));
+ sector_t granularity_aligned_lba, req_sects;
+ sector_t sector_mapped = sector + part_offset;
+
+ granularity_aligned_lba = round_up(sector_mapped,
+ q->limits.discard_granularity >> SECTOR_SHIFT);
+
+ /*
+ * Check whether the discard bio starts at a discard_granularity
+ * aligned LBA,
+ * - If no: set (granularity_aligned_lba - sector_mapped) to
+ * bi_size of the first split bio, then the second bio will
+ * start at a discard_granularity aligned LBA on the device.
+ * - If yes: use bio_aligned_discard_max_sectors() as the max
+ * possible bi_size of the first split bio. Then when this bio
+ * is split in device drive, the split ones are very probably
+ * to be aligned to discard_granularity of the device's queue.
+ */
+ if (granularity_aligned_lba == sector_mapped)
+ req_sects = min_t(sector_t, nr_sects,
+ bio_aligned_discard_max_sectors(q));
+ else
+ req_sects = min_t(sector_t, nr_sects,
+ granularity_aligned_lba - sector_mapped);
WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1c52e56a19b1..b8db72cf1043 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -96,7 +96,6 @@ static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
struct request *rq;
LIST_HEAD(hctx_list);
unsigned int count = 0;
- bool ret;
list_for_each_entry(rq, rq_list, queuelist) {
if (rq->mq_hctx != hctx) {
@@ -108,8 +107,7 @@ static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
list_splice_tail_init(rq_list, &hctx_list);
dispatch:
- ret = blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
- return ret;
+ return blk_mq_dispatch_rq_list(hctx, &hctx_list, count);
}
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 3945c7f5b944..b1acac518c4e 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -101,18 +101,6 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return atomic_read(&hctx->nr_active) < depth;
}
-/*
- * This helper should only be used for flush request to share tag
- * with the request cloned from, and both the two requests can't be
- * in flight at the same time. The caller has to make sure the tag
- * can't be freed.
- */
-static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
- unsigned int tag, struct request *rq)
-{
- hctx->tags->rqs[tag] = rq;
-}
-
static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
unsigned int tag)
{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index abcf590f6238..667155f752f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -277,26 +277,20 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
{
struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
struct request *rq = tags->static_rqs[tag];
- req_flags_t rq_flags = 0;
if (data->q->elevator) {
rq->tag = BLK_MQ_NO_TAG;
rq->internal_tag = tag;
} else {
- if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
- rq_flags = RQF_MQ_INFLIGHT;
- atomic_inc(&data->hctx->nr_active);
- }
rq->tag = tag;
rq->internal_tag = BLK_MQ_NO_TAG;
- data->hctx->tags->rqs[rq->tag] = rq;
}
/* csd/requeue_work/fifo_time is initialized before use */
rq->q = data->q;
rq->mq_ctx = data->ctx;
rq->mq_hctx = data->hctx;
- rq->rq_flags = rq_flags;
+ rq->rq_flags = 0;
rq->cmd_flags = data->cmd_flags;
if (data->flags & BLK_MQ_REQ_PREEMPT)
rq->rq_flags |= RQF_PREEMPT;
@@ -550,8 +544,7 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
blk_stat_add(rq, now);
}
- if (rq->internal_tag != BLK_MQ_NO_TAG)
- blk_mq_sched_completed_request(rq, now);
+ blk_mq_sched_completed_request(rq, now);
blk_account_io_done(rq, now);
@@ -877,10 +870,10 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
void *priv, bool reserved)
{
/*
- * If we find a request that is inflight and the queue matches,
+ * If we find a request that isn't idle and the queue matches,
* we know the queue is busy. Return false to stop the iteration.
*/
- if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
+ if (blk_mq_request_started(rq) && rq->q == hctx->queue) {
bool *busy = priv;
*busy = true;
@@ -1105,9 +1098,10 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
{
struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
- bool shared = blk_mq_tag_busy(rq->mq_hctx);
int tag;
+ blk_mq_tag_busy(rq->mq_hctx);
+
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
bt = &rq->mq_hctx->tags->breserved_tags;
tag_offset = 0;
@@ -1120,19 +1114,23 @@ static bool __blk_mq_get_driver_tag(struct request *rq)
return false;
rq->tag = tag + tag_offset;
- if (shared) {
- rq->rq_flags |= RQF_MQ_INFLIGHT;
- atomic_inc(&rq->mq_hctx->nr_active);
- }
- rq->mq_hctx->tags->rqs[rq->tag] = rq;
return true;
}
static bool blk_mq_get_driver_tag(struct request *rq)
{
- if (rq->tag != BLK_MQ_NO_TAG)
- return true;
- return __blk_mq_get_driver_tag(rq);
+ struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
+
+ if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_get_driver_tag(rq))
+ return false;
+
+ if ((hctx->flags & BLK_MQ_F_TAG_SHARED) &&
+ !(rq->rq_flags & RQF_MQ_INFLIGHT)) {
+ rq->rq_flags |= RQF_MQ_INFLIGHT;
+ atomic_inc(&hctx->nr_active);
+ }
+ hctx->tags->rqs[rq->tag] = rq;
+ return true;
}
static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
@@ -1387,30 +1385,28 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
if (nr_budgets)
nr_budgets--;
ret = q->mq_ops->queue_rq(hctx, &bd);
- if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
- blk_mq_handle_dev_resource(rq, list);
+ switch (ret) {
+ case BLK_STS_OK:
+ queued++;
break;
- } else if (ret == BLK_STS_ZONE_RESOURCE) {
+ case BLK_STS_RESOURCE:
+ case BLK_STS_DEV_RESOURCE:
+ blk_mq_handle_dev_resource(rq, list);
+ goto out;
+ case BLK_STS_ZONE_RESOURCE:
/*
* Move the request to zone_list and keep going through
* the dispatch list to find more requests the drive can
* accept.
*/
blk_mq_handle_zone_resource(rq, &zone_list);
- if (list_empty(list))
- break;
- continue;
- }
-
- if (unlikely(ret != BLK_STS_OK)) {
+ break;
+ default:
errors++;
blk_mq_end_request(rq, BLK_STS_IOERR);
- continue;
}
-
- queued++;
} while (!list_empty(list));
-
+out:
if (!list_empty(&zone_list))
list_splice_tail_init(&zone_list, list);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 18f3eab9f768..656460636ad3 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -273,6 +273,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
if (data.got_token)
break;
if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+ finish_wait(&rqw->wait, &data.wq);
+
/*
* We raced with wbt_wake_function() getting a token,
* which means we now have two. Put our local token
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 3a1ac6434758..4c1fc3417460 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -88,11 +88,29 @@ void blk_abort_request(struct request *req)
}
EXPORT_SYMBOL_GPL(blk_abort_request);
+static unsigned long blk_timeout_mask __read_mostly;
+
+static int __init blk_timeout_init(void)
+{
+ blk_timeout_mask = roundup_pow_of_two(HZ) - 1;
+ return 0;
+}
+
+late_initcall(blk_timeout_init);
+
+/*
+ * Just a rough estimate, we don't care about specific values for timeouts.
+ */
+static inline unsigned long blk_round_jiffies(unsigned long j)
+{
+ return (j + blk_timeout_mask) + 1;
+}
+
unsigned long blk_rq_timeout(unsigned long timeout)
{
unsigned long maxt;
- maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
+ maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT);
if (time_after(timeout, maxt))
timeout = maxt;
@@ -129,7 +147,7 @@ void blk_add_timer(struct request *req)
* than an existing one, modify the timer. Round up to next nearest
* second.
*/
- expiry = blk_rq_timeout(round_jiffies_up(expiry));
+ expiry = blk_rq_timeout(blk_round_jiffies(expiry));
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires)) {
diff --git a/block/blk.h b/block/blk.h
index 94f7c084f68f..49e2928a1632 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -25,11 +25,6 @@ struct blk_flush_queue {
struct list_head flush_data_in_flight;
struct request *flush_rq;
- /*
- * flush_rq shares tag with this rq, both can't be active
- * at the same time
- */
- struct request *orig_rq;
struct lock_class_key key;
spinlock_t mq_flush_lock;
};
@@ -270,6 +265,20 @@ static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
}
/*
+ * The max bio size which is aligned to q->limits.discard_granularity. This
+ * is a hint to split large discard bio in generic block layer, then if device
+ * driver needs to split the discard bio into smaller ones, their bi_size can
+ * be very probably and easily aligned to discard_granularity of the device's
+ * queue.
+ */
+static inline unsigned int bio_aligned_discard_max_sectors(
+ struct request_queue *q)
+{
+ return round_down(UINT_MAX, q->limits.discard_granularity) >>
+ SECTOR_SHIFT;
+}
+
+/*
* Internal io_context interface
*/
void get_io_context(struct io_context *ioc);
diff --git a/block/genhd.c b/block/genhd.c
index 60ae4e1b4d38..8b1e9f48957c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -38,8 +38,6 @@ static struct kobject *block_depr;
static DEFINE_SPINLOCK(ext_devt_lock);
static DEFINE_IDR(ext_devt_idr);
-static const struct device_type disk_type;
-
static void disk_check_events(struct disk_events *ev,
unsigned int *clearing_ptr);
static void disk_alloc_events(struct gendisk *disk);
@@ -1587,7 +1585,7 @@ static char *block_devnode(struct device *dev, umode_t *mode,
return NULL;
}
-static const struct device_type disk_type = {
+const struct device_type disk_type = {
.name = "disk",
.groups = disk_attr_groups,
.release = disk_release,
@@ -2056,18 +2054,12 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
*/
unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
{
- const struct block_device_operations *bdops = disk->fops;
struct disk_events *ev = disk->ev;
unsigned int pending;
unsigned int clearing = mask;
- if (!ev) {
- /* for drivers still using the old ->media_changed method */
- if ((mask & DISK_EVENT_MEDIA_CHANGE) &&
- bdops->media_changed && bdops->media_changed(disk))
- return DISK_EVENT_MEDIA_CHANGE;
+ if (!ev)
return 0;
- }
disk_block_events(disk);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 78951e33b2d7..e62a98a8eeb7 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -619,8 +619,6 @@ int blk_drop_partitions(struct block_device *bdev)
struct disk_part_iter piter;
struct hd_struct *part;
- if (!disk_part_scan_enabled(bdev->bd_disk))
- return 0;
if (bdev->bd_part_count)
return -EBUSY;