diff options
author | Gioh Kim <gi-oh.kim@cloud.ionos.com> | 2021-04-19 09:37:16 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2021-04-20 08:59:04 -0600 |
commit | 2958a995edc94654df690318df7b9b49e5a3ef88 (patch) | |
tree | fddb6e4b38e52bfcc2a984940fa0b65e61292252 /drivers/block/rnbd | |
parent | 12b06533104e802df73c1fbe159437c19933d6c0 (diff) | |
download | lwn-2958a995edc94654df690318df7b9b49e5a3ef88.tar.gz lwn-2958a995edc94654df690318df7b9b49e5a3ef88.zip |
block/rnbd-clt: Support polling mode for IO latency optimization
RNBD can make double-queues for irq-mode and poll-mode.
For example, on 4-CPU system 8 request-queues are created,
4 for irq-mode and 4 for poll-mode.
If the IO has HIPRI flag, the block-layer will call .poll function
of RNBD. Then IO is sent to the poll-mode queue.
Add optional nr_poll_queues argument for map_devices interface.
To support polling of RNBD, RTRS client creates connections
for both of irq-mode and direct-poll-mode.
For example, on 4-CPU system it could've create 5 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
After this patch, it can create 9 connections:
con[0] => user message (softirq cq)
con[1:4] => softirq cq
con[5:8] => DIRECT-POLL cq
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: linux-rdma@vger.kernel.org
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Acked-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20210419073722.15351-14-gi-oh.kim@ionos.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block/rnbd')
-rw-r--r-- | drivers/block/rnbd/rnbd-clt-sysfs.c | 55 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.c | 89 | ||||
-rw-r--r-- | drivers/block/rnbd/rnbd-clt.h | 5 |
3 files changed, 129 insertions, 20 deletions
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 49015f428e67..2b6305ecfd5f 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -34,6 +34,7 @@ enum { RNBD_OPT_DEV_PATH = 1 << 2, RNBD_OPT_ACCESS_MODE = 1 << 3, RNBD_OPT_SESSNAME = 1 << 6, + RNBD_OPT_NR_POLL_QUEUES = 1 << 7, }; static const unsigned int rnbd_opt_mandatory[] = { @@ -42,12 +43,13 @@ static const unsigned int rnbd_opt_mandatory[] = { }; static const match_table_t rnbd_opt_tokens = { - {RNBD_OPT_PATH, "path=%s" }, - {RNBD_OPT_DEV_PATH, "device_path=%s"}, - {RNBD_OPT_DEST_PORT, "dest_port=%d" }, - {RNBD_OPT_ACCESS_MODE, "access_mode=%s"}, - {RNBD_OPT_SESSNAME, "sessname=%s" }, - {RNBD_OPT_ERR, NULL }, + {RNBD_OPT_PATH, "path=%s" }, + {RNBD_OPT_DEV_PATH, "device_path=%s" }, + {RNBD_OPT_DEST_PORT, "dest_port=%d" }, + {RNBD_OPT_ACCESS_MODE, "access_mode=%s" }, + {RNBD_OPT_SESSNAME, "sessname=%s" }, + {RNBD_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, + {RNBD_OPT_ERR, NULL }, }; struct rnbd_map_options { @@ -57,6 +59,7 @@ struct rnbd_map_options { char *pathname; u16 *dest_port; enum rnbd_access_mode *access_mode; + u32 *nr_poll_queues; }; static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt, @@ -68,7 +71,7 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt, int opt_mask = 0; int token; int ret = -EINVAL; - int i, dest_port; + int i, dest_port, nr_poll_queues; int p_cnt = 0; options = kstrdup(buf, GFP_KERNEL); @@ -178,6 +181,19 @@ static int rnbd_clt_parse_map_options(const char *buf, size_t max_path_cnt, kfree(p); break; + case RNBD_OPT_NR_POLL_QUEUES: + if (match_int(args, &nr_poll_queues) || nr_poll_queues < -1 || + nr_poll_queues > (int)nr_cpu_ids) { + pr_err("bad nr_poll_queues parameter '%d'\n", + nr_poll_queues); + ret = -EINVAL; + goto out; + } + if (nr_poll_queues == -1) + nr_poll_queues = nr_cpu_ids; + *opt->nr_poll_queues = nr_poll_queues; + break; + default: pr_err("map_device: Unknown parameter or missing value '%s'\n", p); @@ -227,6 +243,19 @@ static ssize_t state_show(struct kobject *kobj, static struct kobj_attribute rnbd_clt_state_attr = __ATTR_RO(state); +static ssize_t nr_poll_queues_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + struct rnbd_clt_dev *dev; + + dev = container_of(kobj, struct rnbd_clt_dev, kobj); + + return sysfs_emit(page, "%d\n", dev->nr_poll_queues); +} + +static struct kobj_attribute rnbd_clt_nr_poll_queues = + __ATTR_RO(nr_poll_queues); + static ssize_t mapping_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -421,6 +450,7 @@ static struct attribute *rnbd_dev_attrs[] = { &rnbd_clt_state_attr.attr, &rnbd_clt_session_attr.attr, &rnbd_clt_access_mode.attr, + &rnbd_clt_nr_poll_queues.attr, NULL, }; @@ -469,7 +499,7 @@ static ssize_t rnbd_clt_map_device_show(struct kobject *kobj, char *page) { return scnprintf(page, PAGE_SIZE, - "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", + "Usage: echo \"[dest_port=server port number] sessname=<name of the rtrs session> path=<[srcaddr@]dstaddr> [path=<[srcaddr@]dstaddr>] device_path=<full path on remote side> [access_mode=<ro|rw|migration>] [nr_poll_queues=<number of queues>]\" > %s\n\naddr ::= [ ip:<ipv4> | ip:<ipv6> | gid:<gid> ]\n", attr->attr.name); } @@ -541,6 +571,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj, char sessname[NAME_MAX]; enum rnbd_access_mode access_mode = RNBD_ACCESS_RW; u16 port_nr = RTRS_PORT; + u32 nr_poll_queues = 0; struct sockaddr_storage *addrs; struct rtrs_addr paths[6]; @@ -552,6 +583,7 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj, opt.pathname = pathname; opt.dest_port = &port_nr; opt.access_mode = &access_mode; + opt.nr_poll_queues = &nr_poll_queues; addrs = kcalloc(ARRAY_SIZE(paths) * 2, sizeof(*addrs), GFP_KERNEL); if (!addrs) return -ENOMEM; @@ -565,12 +597,13 @@ static ssize_t rnbd_clt_map_device_store(struct kobject *kobj, if (ret) goto out; - pr_info("Mapping device %s on session %s, (access_mode: %s)\n", + pr_info("Mapping device %s on session %s, (access_mode: %s, nr_poll_queues: %d)\n", pathname, sessname, - rnbd_access_mode_str(access_mode)); + rnbd_access_mode_str(access_mode), + nr_poll_queues); dev = rnbd_clt_map_device(sessname, paths, path_cnt, port_nr, pathname, - access_mode); + access_mode, nr_poll_queues); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto out; diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 9b44aac680d5..ea98124e8ce9 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1165,9 +1165,54 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } +static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx) +{ + struct rnbd_queue *q = hctx->driver_data; + struct rnbd_clt_dev *dev = q->dev; + int cnt; + + cnt = rtrs_clt_rdma_cq_direct(dev->sess->rtrs, hctx->queue_num); + return cnt; +} + +static int rnbd_rdma_map_queues(struct blk_mq_tag_set *set) +{ + struct rnbd_clt_session *sess = set->driver_data; + + /* shared read/write queues */ + set->map[HCTX_TYPE_DEFAULT].nr_queues = num_online_cpus(); + set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; + set->map[HCTX_TYPE_READ].nr_queues = num_online_cpus(); + set->map[HCTX_TYPE_READ].queue_offset = 0; + blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); + blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); + + if (sess->nr_poll_queues) { + /* dedicated queue for poll */ + set->map[HCTX_TYPE_POLL].nr_queues = sess->nr_poll_queues; + set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_READ].queue_offset + + set->map[HCTX_TYPE_READ].nr_queues; + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); + pr_info("[session=%s] mapped %d/%d/%d default/read/poll queues.\n", + sess->sessname, + set->map[HCTX_TYPE_DEFAULT].nr_queues, + set->map[HCTX_TYPE_READ].nr_queues, + set->map[HCTX_TYPE_POLL].nr_queues); + } else { + pr_info("[session=%s] mapped %d/%d default/read queues.\n", + sess->sessname, + set->map[HCTX_TYPE_DEFAULT].nr_queues, + set->map[HCTX_TYPE_READ].nr_queues); + } + + return 0; +} + static struct blk_mq_ops rnbd_mq_ops = { .queue_rq = rnbd_queue_rq, .complete = rnbd_softirq_done_fn, + .map_queues = rnbd_rdma_map_queues, + .poll = rnbd_rdma_poll, }; static int setup_mq_tags(struct rnbd_clt_session *sess) @@ -1181,7 +1226,15 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_TAG_QUEUE_SHARED; tag_set->cmd_size = sizeof(struct rnbd_iu) + RNBD_RDMA_SGL_SIZE; - tag_set->nr_hw_queues = num_online_cpus(); + + /* for HCTX_TYPE_DEFAULT, HCTX_TYPE_READ, HCTX_TYPE_POLL */ + tag_set->nr_maps = sess->nr_poll_queues ? HCTX_MAX_TYPES : 2; + /* + * HCTX_TYPE_DEFAULT and HCTX_TYPE_READ share one set of queues + * others are for HCTX_TYPE_POLL + */ + tag_set->nr_hw_queues = num_online_cpus() + sess->nr_poll_queues; + tag_set->driver_data = sess; return blk_mq_alloc_tag_set(tag_set); } @@ -1189,7 +1242,7 @@ static int setup_mq_tags(struct rnbd_clt_session *sess) static struct rnbd_clt_session * find_and_get_or_create_sess(const char *sessname, const struct rtrs_addr *paths, - size_t path_cnt, u16 port_nr) + size_t path_cnt, u16 port_nr, u32 nr_poll_queues) { struct rnbd_clt_session *sess; struct rtrs_attrs attrs; @@ -1198,6 +1251,17 @@ find_and_get_or_create_sess(const char *sessname, struct rtrs_clt_ops rtrs_ops; sess = find_or_create_sess(sessname, &first); + if (sess == ERR_PTR(-ENOMEM)) + return ERR_PTR(-ENOMEM); + else if ((nr_poll_queues && !first) || (!nr_poll_queues && sess->nr_poll_queues)) { + /* + * A device MUST have its own session to use the polling-mode. + * It must fail to map new device with the same session. + */ + err = -EINVAL; + goto put_sess; + } + if (!first) return sess; @@ -1219,7 +1283,7 @@ find_and_get_or_create_sess(const char *sessname, 0, /* Do not use pdu of rtrs */ RECONNECT_DELAY, BMAX_SEGMENTS, BLK_MAX_SEGMENT_SIZE, - MAX_RECONNECTS); + MAX_RECONNECTS, nr_poll_queues); if (IS_ERR(sess->rtrs)) { err = PTR_ERR(sess->rtrs); goto wake_up_and_put; @@ -1227,6 +1291,7 @@ find_and_get_or_create_sess(const char *sessname, rtrs_clt_query(sess->rtrs, &attrs); sess->max_io_size = attrs.max_io_size; sess->queue_depth = attrs.queue_depth; + sess->nr_poll_queues = nr_poll_queues; err = setup_mq_tags(sess); if (err) @@ -1370,7 +1435,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev) static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, enum rnbd_access_mode access_mode, - const char *pathname) + const char *pathname, + u32 nr_poll_queues) { struct rnbd_clt_dev *dev; int ret; @@ -1379,7 +1445,12 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, if (!dev) return ERR_PTR(-ENOMEM); - dev->hw_queues = kcalloc(nr_cpu_ids, sizeof(*dev->hw_queues), + /* + * nr_cpu_ids: the number of softirq queues + * nr_poll_queues: the number of polling queues + */ + dev->hw_queues = kcalloc(nr_cpu_ids + nr_poll_queues, + sizeof(*dev->hw_queues), GFP_KERNEL); if (!dev->hw_queues) { ret = -ENOMEM; @@ -1405,6 +1476,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; + dev->nr_poll_queues = nr_poll_queues; mutex_init(&dev->lock); refcount_set(&dev->refcount, 1); dev->dev_state = DEV_STATE_INIT; @@ -1491,7 +1563,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, struct rtrs_addr *paths, size_t path_cnt, u16 port_nr, const char *pathname, - enum rnbd_access_mode access_mode) + enum rnbd_access_mode access_mode, + u32 nr_poll_queues) { struct rnbd_clt_session *sess; struct rnbd_clt_dev *dev; @@ -1500,11 +1573,11 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, if (unlikely(exists_devpath(pathname, sessname))) return ERR_PTR(-EEXIST); - sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr); + sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues); if (IS_ERR(sess)) return ERR_CAST(sess); - dev = init_dev(sess, access_mode, pathname); + dev = init_dev(sess, access_mode, pathname, nr_poll_queues); if (IS_ERR(dev)) { pr_err("map_device: failed to map device '%s' from session %s, can't initialize device, err: %ld\n", pathname, sess->sessname, PTR_ERR(dev)); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index 714d426b449b..451e7383738f 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -90,6 +90,7 @@ struct rnbd_clt_session { int queue_depth; u32 max_io_size; struct blk_mq_tag_set tag_set; + u32 nr_poll_queues; struct mutex lock; /* protects state and devs_list */ struct list_head devs_list; /* list of struct rnbd_clt_dev */ refcount_t refcount; @@ -118,6 +119,7 @@ struct rnbd_clt_dev { enum rnbd_clt_dev_state dev_state; char *pathname; enum rnbd_access_mode access_mode; + u32 nr_poll_queues; bool read_only; bool rotational; bool wc; @@ -147,7 +149,8 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, struct rtrs_addr *paths, size_t path_cnt, u16 port_nr, const char *pathname, - enum rnbd_access_mode access_mode); + enum rnbd_access_mode access_mode, + u32 nr_poll_queues); int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force, const struct attribute *sysfs_self); |