diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 41 | ||||
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-map.c | 2 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 12 | ||||
-rw-r--r-- | block/blk-mq.c | 8 | ||||
-rw-r--r-- | block/blk-settings.c | 3 | ||||
-rw-r--r-- | block/blk-wbt.c | 12 | ||||
-rw-r--r-- | block/fops.c | 35 |
8 files changed, 73 insertions, 42 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index c8b28ec5dde9..fc49be622e05 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -34,6 +34,8 @@ #include "blk-ioprio.h" #include "blk-throttle.h" +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu); + /* * blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation. * blkcg_pol_register_mutex nests outside of it and synchronizes entire @@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ bool blkcg_debug_stats = false; +static DEFINE_RAW_SPINLOCK(blkg_stat_lock); + #define BLKG_DESTROY_BATCH_SIZE 64 /* @@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg) static void __blkg_release(struct rcu_head *rcu) { struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); + struct blkcg *blkcg = blkg->blkcg; + int cpu; #ifdef CONFIG_BLK_CGROUP_PUNT_BIO WARN_ON(!bio_list_empty(&blkg->async_bios)); #endif + /* + * Flush all the non-empty percpu lockless lists before releasing + * us, given these stat belongs to us. + * + * blkg_stat_lock is for serializing blkg stat update + */ + for_each_possible_cpu(cpu) + __blkcg_rstat_flush(blkcg, cpu); /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); @@ -965,16 +979,12 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } -static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu) { - struct blkcg *blkcg = css_to_blkcg(css); struct llist_head *lhead = per_cpu_ptr(blkcg->lhead, cpu); struct llist_node *lnode; struct blkg_iostat_set *bisc, *next_bisc; - - /* Root-level stats are sourced from system-wide IO stats */ - if (!cgroup_parent(css->cgroup)) - return; + unsigned long flags; rcu_read_lock(); @@ -983,6 +993,14 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) goto out; /* + * For covering concurrent parent blkg update from blkg_release(). + * + * When flushing from cgroup, cgroup_rstat_lock is always held, so + * this lock won't cause contention most of time. + */ + raw_spin_lock_irqsave(&blkg_stat_lock, flags); + + /* * Iterate only the iostat_cpu's queued in the lockless list. */ llist_for_each_entry_safe(bisc, next_bisc, lnode, lnode) { @@ -1005,13 +1023,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) if (parent && parent->parent) blkcg_iostat_update(parent, &blkg->iostat.cur, &blkg->iostat.last); - percpu_ref_put(&blkg->refcnt); } - + raw_spin_unlock_irqrestore(&blkg_stat_lock, flags); out: rcu_read_unlock(); } +static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) +{ + /* Root-level stats are sourced from system-wide IO stats */ + if (cgroup_parent(css->cgroup)) + __blkcg_rstat_flush(css_to_blkcg(css), cpu); +} + /* * We source root cgroup stats from the system-wide stats to avoid * tracking the same information twice and incurring overhead when no @@ -2092,7 +2116,6 @@ void blk_cgroup_bio_start(struct bio *bio) llist_add(&bis->lnode, lhead); WRITE_ONCE(bis->lqueued, true); - percpu_ref_get(&bis->blkg->refcnt); } u64_stats_update_end_irqrestore(&bis->sync, flags); diff --git a/block/blk-core.c b/block/blk-core.c index 2ae22bebeb3e..3fc68b944479 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -521,7 +521,7 @@ static inline int bio_check_eod(struct bio *bio) sector_t maxsector = bdev_nr_sectors(bio->bi_bdev); unsigned int nr_sectors = bio_sectors(bio); - if (nr_sectors && maxsector && + if (nr_sectors && (nr_sectors > maxsector || bio->bi_iter.bi_sector > maxsector - nr_sectors)) { pr_info_ratelimited("%s: attempt to access beyond end of device\n" diff --git a/block/blk-map.c b/block/blk-map.c index 3551c3ff17cf..44d74a30ddac 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -248,7 +248,7 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq, { struct bio *bio; - if (rq->cmd_flags & REQ_ALLOC_CACHE) { + if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) { bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask, &fs_bio_set); if (!bio) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 426197312069..cc57e2dd9a0b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -40,16 +40,20 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) unsigned int users; struct blk_mq_tags *tags = hctx->tags; + /* + * calling test_bit() prior to test_and_set_bit() is intentional, + * it avoids dirtying the cacheline if the queue is already active. + */ if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; - if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) + if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) || + test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return; - set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags); } else { - if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) || + test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) return; - set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state); } spin_lock_irq(&tags->lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index 32e50bc0cbb0..98eb31ff914d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -688,6 +688,10 @@ static void __blk_mq_free_request(struct request *rq) blk_crypto_free_request(rq); blk_pm_mark_last_busy(rq); rq->mq_hctx = NULL; + + if (rq->rq_flags & RQF_MQ_INFLIGHT) + __blk_mq_dec_active_requests(hctx); + if (rq->tag != BLK_MQ_NO_TAG) blk_mq_put_tag(hctx->tags, ctx, rq->tag); if (sched_tag != BLK_MQ_NO_TAG) @@ -699,15 +703,11 @@ static void __blk_mq_free_request(struct request *rq) void blk_mq_free_request(struct request *rq) { struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx = rq->mq_hctx; if ((rq->rq_flags & RQF_USE_SCHED) && q->elevator->type->ops.finish_request) q->elevator->type->ops.finish_request(rq); - if (rq->rq_flags & RQF_MQ_INFLIGHT) - __blk_mq_dec_active_requests(hctx); - if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq))) laptop_io_completion(q->disk->bdi); diff --git a/block/blk-settings.c b/block/blk-settings.c index 896b4654ab00..4dd59059b788 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -915,6 +915,7 @@ static bool disk_has_partitions(struct gendisk *disk) void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) { struct request_queue *q = disk->queue; + unsigned int old_model = q->limits.zoned; switch (model) { case BLK_ZONED_HM: @@ -952,7 +953,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) */ blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); - } else { + } else if (old_model != BLK_ZONED_NONE) { disk_clear_zone_settings(disk); } } diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 9f7c99c025f3..0bb613139bec 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -713,14 +713,16 @@ void wbt_enable_default(struct gendisk *disk) { struct request_queue *q = disk->queue; struct rq_qos *rqos; - bool disable_flag = q->elevator && - test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags); + bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ); + + if (q->elevator && + test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags)) + enable = false; /* Throttling already enabled? */ rqos = wbt_rq_qos(q); if (rqos) { - if (!disable_flag && - RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) + if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT) RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT; return; } @@ -729,7 +731,7 @@ void wbt_enable_default(struct gendisk *disk) if (!blk_queue_registered(q)) return; - if (queue_is_mq(q) && !disable_flag) + if (queue_is_mq(q) && enable) wbt_init(disk); } EXPORT_SYMBOL_GPL(wbt_enable_default); diff --git a/block/fops.c b/block/fops.c index 9871bd6052b4..a286bf3325c5 100644 --- a/block/fops.c +++ b/block/fops.c @@ -505,7 +505,7 @@ static int blkdev_open(struct inode *inode, struct file *filp) * during an unstable branch. */ filp->f_flags |= O_LARGEFILE; - filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; + filp->f_mode |= FMODE_BUF_RASYNC; /* * Use the file private data to store the holder for exclusive openes. @@ -519,6 +519,9 @@ static int blkdev_open(struct inode *inode, struct file *filp) if (IS_ERR(bdev)) return PTR_ERR(bdev); + if (bdev_nowait(bdev)) + filp->f_mode |= FMODE_NOWAIT; + filp->f_mapping = bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); return 0; @@ -595,21 +598,9 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) goto reexpand; /* skip atime */ if (iocb->ki_flags & IOCB_DIRECT) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - - if (iocb->ki_flags & IOCB_NOWAIT) { - if (filemap_range_needs_writeback(mapping, pos, - pos + count - 1)) { - ret = -EAGAIN; - goto reexpand; - } - } else { - ret = filemap_write_and_wait_range(mapping, pos, - pos + count - 1); - if (ret < 0) - goto reexpand; - } - + ret = kiocb_write_and_wait(iocb, count); + if (ret < 0) + goto reexpand; file_accessed(iocb->ki_filp); ret = blkdev_direct_IO(iocb, to); @@ -697,6 +688,16 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return error; } +static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct inode *bd_inode = bdev_file_inode(file); + + if (bdev_read_only(I_BDEV(bd_inode))) + return generic_file_readonly_mmap(file, vma); + + return generic_file_mmap(file, vma); +} + const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_release, @@ -704,7 +705,7 @@ const struct file_operations def_blk_fops = { .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, .iopoll = iocb_bio_iopoll, - .mmap = generic_file_mmap, + .mmap = blkdev_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = blkdev_ioctl, #ifdef CONFIG_COMPAT |