summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2019-08-08 15:03:00 -0400
committerJens Axboe <axboe@kernel.dk>2019-08-14 08:50:01 -0600
commitb8e24a9300b0836a9d39f6b20746766b3b81f1bd (patch)
tree2b992e084f3e88eca44d34c427745fed595e6149 /block
parent73d9c8d4c0017e21e1ff519474ceb1450484dc9a (diff)
downloadlwn-b8e24a9300b0836a9d39f6b20746766b3b81f1bd.tar.gz
lwn-b8e24a9300b0836a9d39f6b20746766b3b81f1bd.zip
block: annotate refault stalls from IO submission
psi tracks the time tasks wait for refaulting pages to become uptodate, but it does not track the time spent submitting the IO. The submission part can be significant if backing storage is contended or when cgroup throttling (io.latency) is in effect - a lot of time is spent in submit_bio(). In that case, we underreport memory pressure. Annotate submit_bio() to account submission time as memory stall when the bio is reading userspace workingset pages. Tested-by: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-core.c23
2 files changed, 25 insertions, 1 deletions
diff --git a/block/bio.c b/block/bio.c
index 24a496f5d2e2..54769659a434 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
bio->bi_iter.bi_size += len;
bio->bi_vcnt++;
+
+ if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
+ bio_set_flag(bio, BIO_WORKINGSET);
}
EXPORT_SYMBOL_GPL(__bio_add_page);
diff --git a/block/blk-core.c b/block/blk-core.c
index 919629ce4015..834aea04718f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -36,6 +36,7 @@
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
+#include <linux/psi.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/
blk_qc_t submit_bio(struct bio *bio)
{
+ bool workingset_read = false;
+ unsigned long pflags;
+ blk_qc_t ret;
+
if (blkcg_punt_bio_submit(bio))
return BLK_QC_T_NONE;
@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
+ if (bio_flagged(bio, BIO_WORKINGSET))
+ workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio)
}
}
- return generic_make_request(bio);
+ /*
+ * If we're reading data that is part of the userspace
+ * workingset, count submission time as memory stall. When the
+ * device is congested, or the submitting cgroup IO-throttled,
+ * submission can be a significant part of overall IO time.
+ */
+ if (workingset_read)
+ psi_memstall_enter(&pflags);
+
+ ret = generic_make_request(bio);
+
+ if (workingset_read)
+ psi_memstall_leave(&pflags);
+
+ return ret;
}
EXPORT_SYMBOL(submit_bio);