summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c41
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-mq-tag.c12
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/blk-settings.c3
-rw-r--r--block/blk-wbt.c12
-rw-r--r--block/fops.c35
8 files changed, 73 insertions, 42 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c8b28ec5dde9..fc49be622e05 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -34,6 +34,8 @@
#include "blk-ioprio.h"
#include "blk-throttle.h"
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu);
+
/*
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
bool blkcg_debug_stats = false;
+static DEFINE_RAW_SPINLOCK(blkg_stat_lock);
+
#define BLKG_DESTROY_BATCH_SIZE 64
/*
@@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg)
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+ struct blkcg *blkcg = blkg->blkcg;
+ int cpu;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif
+ /*
+ * Flush all the non-empty percpu lockless lists before releasing
+ * us, given these stat belongs to us.
+ *
+ * blkg_stat_lock is for serializing blkg stat update
+ */
+ for_each_possible_cpu(cpu)
+ __blkcg_rstat_flush(blkcg, cpu);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
@@ -965,16 +979,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
}
-static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
{
- struct blkcg *blkcg = css_to_blkcg(css);
struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu);
struct llist_node *lnode;
struct blkg_iostat_set *bisc, *next_bisc;
-
- /* Root-level stats are sourced from system-wide IO stats */
- if (!cgroup_parent(css->cgroup))
- return;
+ unsigned long flags;
rcu_read_lock();
@@ -983,6 +993,14 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
goto out;
/*
+ * For covering concurrent parent blkg update from blkg_release().
+ *
+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
+ * this lock won't cause contention most of time.
+ */
+ raw_spin_lock_irqsave(&blkg_stat_lock, flags);
+
+ /*
* Iterate only the iostat_cpu's queued in the lockless list.
*/
llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) {
@@ -1005,13 +1023,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
if (parent && parent->parent)
blkcg_iostat_update(parent, &blkg->iostat.cur,
&blkg->iostat.last);
- percpu_ref_put(&blkg->refcnt);
}
-
+ raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
out:
rcu_read_unlock();
}
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ /* Root-level stats are sourced from system-wide IO stats */
+ if (cgroup_parent(css->cgroup))
+ __blkcg_rstat_flush(css_to_blkcg(css), cpu);
+}
+
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
@@ -2092,7 +2116,6 @@ void blk_cgroup_bio_start(struct bio *bio)
llist_add(&bis->lnode, lhead);
WRITE_ONCE(bis->lqueued, true);
- percpu_ref_get(&bis->blkg->refcnt);
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2ae22bebeb3e..3fc68b944479 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -521,7 +521,7 @@ static inline int bio_check_eod(struct bio *bio)
sector_t maxsector = bdev_nr_sectors(bio->bi_bdev);
unsigned int nr_sectors = bio_sectors(bio);
- if (nr_sectors && maxsector &&
+ if (nr_sectors &&
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
diff --git a/block/blk-map.c b/block/blk-map.c
index 3551c3ff17cf..44d74a30ddac 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -248,7 +248,7 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
{
struct bio *bio;
- if (rq->cmd_flags & REQ_ALLOC_CACHE) {
+ if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) {
bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask,
&fs_bio_set);
if (!bio)
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 426197312069..cc57e2dd9a0b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -40,16 +40,20 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
unsigned int users;
struct blk_mq_tags *tags = hctx->tags;
+ /*
+ * calling test_bit() prior to test_and_set_bit() is intentional,
+ * it avoids dirtying the cacheline if the queue is already active.
+ */
if (blk_mq_is_shared_tags(hctx->flags)) {
struct request_queue *q = hctx->queue;
- if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+ if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
return;
- set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
} else {
- if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+ if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+ test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
- set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
}
spin_lock_irq(&tags->lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32e50bc0cbb0..98eb31ff914d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -688,6 +688,10 @@ static void __blk_mq_free_request(struct request *rq)
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
+
+ if (rq->rq_flags & RQF_MQ_INFLIGHT)
+ __blk_mq_dec_active_requests(hctx);
+
if (rq->tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
if (sched_tag != BLK_MQ_NO_TAG)
@@ -699,15 +703,11 @@ static void __blk_mq_free_request(struct request *rq)
void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
if ((rq->rq_flags & RQF_USE_SCHED) &&
q->elevator->type->ops.finish_request)
q->elevator->type->ops.finish_request(rq);
- if (rq->rq_flags & RQF_MQ_INFLIGHT)
- __blk_mq_dec_active_requests(hctx);
-
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->disk->bdi);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 896b4654ab00..4dd59059b788 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -915,6 +915,7 @@ static bool disk_has_partitions(struct gendisk *disk)
void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
{
struct request_queue *q = disk->queue;
+ unsigned int old_model = q->limits.zoned;
switch (model) {
case BLK_ZONED_HM:
@@ -952,7 +953,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
*/
blk_queue_zone_write_granularity(q,
queue_logical_block_size(q));
- } else {
+ } else if (old_model != BLK_ZONED_NONE) {
disk_clear_zone_settings(disk);
}
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 9f7c99c025f3..0bb613139bec 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -713,14 +713,16 @@ void wbt_enable_default(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct rq_qos *rqos;
- bool disable_flag = q->elevator &&
- test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
+ bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
+
+ if (q->elevator &&
+ test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
+ enable = false;
/* Throttling already enabled? */
rqos = wbt_rq_qos(q);
if (rqos) {
- if (!disable_flag &&
- RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+ if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
return;
}
@@ -729,7 +731,7 @@ void wbt_enable_default(struct gendisk *disk)
if (!blk_queue_registered(q))
return;
- if (queue_is_mq(q) && !disable_flag)
+ if (queue_is_mq(q) && enable)
wbt_init(disk);
}
EXPORT_SYMBOL_GPL(wbt_enable_default);
diff --git a/block/fops.c b/block/fops.c
index 9871bd6052b4..a286bf3325c5 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -505,7 +505,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_BUF_RASYNC;
/*
* Use the file private data to store the holder for exclusive openes.
@@ -519,6 +519,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (IS_ERR(bdev))
return PTR_ERR(bdev);
+ if (bdev_nowait(bdev))
+ filp->f_mode |= FMODE_NOWAIT;
+
filp->f_mapping = bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
return 0;
@@ -595,21 +598,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
goto reexpand; /* skip atime */
if (iocb->ki_flags & IOCB_DIRECT) {
- struct address_space *mapping = iocb->ki_filp->f_mapping;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, pos,
- pos + count - 1)) {
- ret = -EAGAIN;
- goto reexpand;
- }
- } else {
- ret = filemap_write_and_wait_range(mapping, pos,
- pos + count - 1);
- if (ret < 0)
- goto reexpand;
- }
-
+ ret = kiocb_write_and_wait(iocb, count);
+ if (ret < 0)
+ goto reexpand;
file_accessed(iocb->ki_filp);
ret = blkdev_direct_IO(iocb, to);
@@ -697,6 +688,16 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
return error;
}
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *bd_inode = bdev_file_inode(file);
+
+ if (bdev_read_only(I_BDEV(bd_inode)))
+ return generic_file_readonly_mmap(file, vma);
+
+ return generic_file_mmap(file, vma);
+}
+
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_release,
@@ -704,7 +705,7 @@ const struct file_operations def_blk_fops = {
.read_iter = blkdev_read_iter,
.write_iter = blkdev_write_iter,
.iopoll = iocb_bio_iopoll,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = blkdev_ioctl,
#ifdef CONFIG_COMPAT