summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2023-03-27 09:49:51 +0900
committerDavid Sterba <dsterba@suse.com>2023-04-17 18:01:22 +0200
commit3480373ebdf7625ee29bee6508c9fc4ae70c00bf (patch)
treedddeb5c65b5a3d8dd35e31347916ac7d4846ce7b /block
parent0a0596fbbe5bddd28b1dfae7e7ecb6d70bdbf059 (diff)
downloadlwn-3480373ebdf7625ee29bee6508c9fc4ae70c00bf.tar.gz
lwn-3480373ebdf7625ee29bee6508c9fc4ae70c00bf.zip
btrfs, block: move REQ_CGROUP_PUNT to btrfs
REQ_CGROUP_PUNT is a bit annoying as it is hard to follow and adds a branch to the bio submission hot path. To fix this, export blkcg_punt_bio_submit and let btrfs call it directly. Add a new REQ_FS_PRIVATE flag for btrfs to indicate to it's own low-level bio submission code that a punt to the cgroup submission helper is required. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c31
-rw-r--r--block/blk-cgroup.h12
-rw-r--r--block/blk-core.c3
3 files changed, 17 insertions, 29 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index bd50b55bdb61..9f5f3263c178 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1688,24 +1688,27 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
-bool __blkcg_punt_bio_submit(struct bio *bio)
+/*
+ * When a shared kthread issues a bio for a cgroup, doing so synchronously can
+ * lead to priority inversions as the kthread can be trapped waiting for that
+ * cgroup. Use this helper instead of submit_bio to punt the actual issuing to
+ * a dedicated per-blkcg work item to avoid such priority inversions.
+ */
+void blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;
- /* consume the flag first */
- bio->bi_opf &= ~REQ_CGROUP_PUNT;
-
- /* never bounce for the root cgroup */
- if (!blkg->parent)
- return false;
-
- spin_lock_bh(&blkg->async_bio_lock);
- bio_list_add(&blkg->async_bios, bio);
- spin_unlock_bh(&blkg->async_bio_lock);
-
- queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
- return true;
+ if (blkg->parent) {
+ spin_lock_bh(&blkg->async_bio_lock);
+ bio_list_add(&blkg->async_bios, bio);
+ spin_unlock_bh(&blkg->async_bio_lock);
+ queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
+ } else {
+ /* never bounce for the root cgroup */
+ submit_bio(bio);
+ }
}
+EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);
/*
* Scale the accumulated delay based on how long it has been since we updated
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 9c5078755e5e..64758ab9f1f1 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -375,16 +375,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q)))
-bool __blkcg_punt_bio_submit(struct bio *bio);
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio)
-{
- if (bio->bi_opf & REQ_CGROUP_PUNT)
- return __blkcg_punt_bio_submit(bio);
- else
- return false;
-}
-
static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
@@ -506,8 +496,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
-
-static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
diff --git a/block/blk-core.c b/block/blk-core.c
index 42926e6cb83c..478978dcb2bd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct);
*/
void submit_bio(struct bio *bio)
{
- if (blkcg_punt_bio_submit(bio))
- return;
-
if (bio_op(bio) == REQ_OP_READ) {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, bio_sectors(bio));