From 86ff7c2a80cd357f6156a53b354f6a0b357dc0c9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 30 Jan 2018 22:04:57 -0500 Subject: blk-mq: introduce BLK_STS_DEV_RESOURCE This status is returned from driver to block layer if device related resource is unavailable, but driver can guarantee that IO dispatch will be triggered in future when the resource is available. Convert some drivers to return BLK_STS_DEV_RESOURCE. Also, if driver returns BLK_STS_RESOURCE and SCHED_RESTART is set, rerun queue after a delay (BLK_MQ_DELAY_QUEUE) to avoid IO stalls. BLK_MQ_DELAY_QUEUE is 3 ms because both scsi-mq and nvmefc are using that magic value. If a driver can make sure there is in-flight IO, it is safe to return BLK_STS_DEV_RESOURCE because: 1) If all in-flight IOs complete before examining SCHED_RESTART in blk_mq_dispatch_rq_list(), SCHED_RESTART must be cleared, so queue is run immediately in this case by blk_mq_dispatch_rq_list(); 2) if there is any in-flight IO after/when examining SCHED_RESTART in blk_mq_dispatch_rq_list(): - if SCHED_RESTART isn't set, queue is run immediately as handled in 1) - otherwise, this request will be dispatched after any in-flight IO is completed via blk_mq_sched_restart() 3) if SCHED_RESTART is set concurently in context because of BLK_STS_RESOURCE, blk_mq_delay_run_hw_queue() will cover the above two cases and make sure IO hang can be avoided. One invariant is that queue will be rerun if SCHED_RESTART is set. Suggested-by: Jens Axboe Tested-by: Laurence Oberman Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c5d3db0d83f8..bf18b95ed92d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -39,6 +39,24 @@ typedef u8 __bitwise blk_status_t; #define BLK_STS_AGAIN ((__force blk_status_t)12) +/* + * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if + * device related resources are unavailable, but the driver can guarantee + * that the queue will be rerun in the future once resources become + * available again. This is typically the case for device specific + * resources that are consumed for IO. If the driver fails allocating these + * resources, we know that inflight (or pending) IO will free these + * resource upon completion. + * + * This is different from BLK_STS_RESOURCE in that it explicitly references + * a device specific resource. For resources of wider scope, allocation + * failure can happen without having pending IO. This means that we can't + * rely on request completions freeing these resources, as IO may not be in + * flight. Examples of that are kernel memory allocations, DMA mappings, or + * any other system wide resources. + */ +#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with -- cgit v1.2.3 From 60f91826ca62bcf85d6d5fc90941337282787671 Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Tue, 24 Oct 2017 09:16:42 +0800 Subject: buffer: Avoid setting buffer bits that are already set It's expensive to set buffer flags that are already set, because that causes a costly cache line transition. A common case is setting the "verified" flag during ext4 writes. This patch checks for the flag being set first. With the AIM7/creat-clo benchmark testing on a 48G ramdisk based-on ext4 file system, we see 3.3%(15431->15936) improvement of aim7.jobs-per-min on a 2-sockets broadwell platform. What the benchmark does is: it forks 3000 processes, and each process do the following: a) open a new file b) close the file c) delete the file until loop=100*1000 times. The original patch is contributed by Andi Kleen. Signed-off-by: Andi Kleen Tested-by: Kemi Wang Signed-off-by: Kemi Wang Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8b1bf8d3d4a2..06797ef10fd9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -81,11 +81,14 @@ struct buffer_head { /* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. + * To avoid reset buffer flags that are already set, because that causes + * a costly cache line transition, check the flag first. */ #define BUFFER_FNS(bit, name) \ static __always_inline void set_buffer_##name(struct buffer_head *bh) \ { \ - set_bit(BH_##bit, &(bh)->b_state); \ + if (!test_bit(BH_##bit, &(bh)->b_state)) \ + set_bit(BH_##bit, &(bh)->b_state); \ } \ static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ { \ -- cgit v1.2.3