From 9195b317f1b62e9efd67d8702c920156135a81b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 23 Feb 2023 18:13:00 -0800 Subject: nbd: allow genl access outside init_net NBD doesn't have much to do with networking, allow users outside init_net to access the family. Reviewed-by: Josef Bacik Signed-off-by: Jakub Kicinski Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230224021301.1630703-1-kuba@kernel.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 592cfa8b765a..53e4bb754fd9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2325,6 +2325,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops), .resv_start_op = NBD_CMD_STATUS + 1, .maxattr = NBD_ATTR_MAX, + .netnsok = 1, .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), -- cgit v1.2.3 From 7399b886b55ef7b489a1bd3939451fcf1792941d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 23 Feb 2023 18:13:01 -0800 Subject: nbd: use the structured req attr check Use the macro for checking presence of required attributes. It has the advantage of reporting to the user which attr was missing in a machine-readable format (extack). Reviewed-by: Josef Bacik Signed-off-by: Jakub Kicinski Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230224021301.1630703-2-kuba@kernel.org Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 53e4bb754fd9..c0b1611b9665 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1934,11 +1934,11 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } - if (!info->attrs[NBD_ATTR_SOCKETS]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) { pr_err("must specify at least one socket\n"); return -EINVAL; } - if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) { pr_err("must specify a size in bytes for the device\n"); return -EINVAL; } @@ -2123,7 +2123,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify an index to disconnect\n"); return -EINVAL; } @@ -2161,7 +2161,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify a device to reconfigure\n"); return -EINVAL; } -- cgit v1.2.3 From fbb5615f9f812acb660b8bd7b51e57ddbdbc7315 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 30 Mar 2023 11:49:25 -0700 Subject: null_blk: use non-deprecated lib functions Use library helper memcpy_page() to copy source page into destination instead of having duplicate code in copy_to_nullb() & copy_from_nullb(). In copy_from_nullb() also replace the memset call with zero_user(). Use library helper memset_page() to set the buffer to 0xFF instead of having duplicate code. This also removes deprecated API kmap_atomic() from copy_to_nullb() copy_from_nullb() and nullb_fill_pattern(), from :include/linux/highmem.h: "kmap_atomic - Atomically map a page for temporary usage - Deprecated!" Signed-off-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230330184926.64209-2-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4c601ca9552a..87edb6d71798 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1112,7 +1112,6 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1126,11 +1125,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, if (!t_page) return -ENOSPC; - src = kmap_atomic(source); - dst = kmap_atomic(t_page->page); - memcpy(dst + offset, src + off + count, temp); - kunmap_atomic(dst); - kunmap_atomic(src); + memcpy_page(t_page->page, offset, source, off + count, temp); __set_bit(sector & SECTOR_MASK, t_page->bitmap); @@ -1149,7 +1144,6 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1158,16 +1152,11 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, t_page = null_lookup_page(nullb, sector, false, !null_cache_active(nullb)); - dst = kmap_atomic(dest); - if (!t_page) { - memset(dst + off + count, 0, temp); - goto next; - } - src = kmap_atomic(t_page->page); - memcpy(dst + off + count, src + offset, temp); - kunmap_atomic(src); -next: - kunmap_atomic(dst); + if (t_page) + memcpy_page(dest, off + count, t_page->page, offset, + temp); + else + zero_user(dest, off + count, temp); count += temp; sector += temp >> SECTOR_SHIFT; @@ -1178,11 +1167,7 @@ next: static void nullb_fill_pattern(struct nullb *nullb, struct page *page, unsigned int len, unsigned int off) { - void *dst; - - dst = kmap_atomic(page); - memset(dst + off, 0xFF, len); - kunmap_atomic(dst); + memset_page(page, off, 0xff, len); } blk_status_t null_handle_discard(struct nullb_device *dev, -- cgit v1.2.3 From acc3c8799b9723d0b6a8cd67f8036dfdaa280825 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 30 Mar 2023 11:49:26 -0700 Subject: null_blk: use kmap_local_page() and kunmap_local() Replace the deprecated API kmap_atomic() and kunmap_atomic() with kmap_local_page() and kunmap_local() in null_flush_cache_page(). Signed-off-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20230330184926.64209-2-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 87edb6d71798..1d914a82f863 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1030,8 +1030,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) if (!t_page) return -ENOMEM; - src = kmap_atomic(c_page->page); - dst = kmap_atomic(t_page->page); + src = kmap_local_page(c_page->page); + dst = kmap_local_page(t_page->page); for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { @@ -1043,8 +1043,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) } } - kunmap_atomic(dst); - kunmap_atomic(src); + kunmap_local(dst); + kunmap_local(src); ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); null_free_page(ret); -- cgit v1.2.3 From 33f7d31673eb43298b25b0cca30acc487e8a332a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Mar 2023 12:27:39 +0200 Subject: drbd: Rip out the ERR_IF_CNT_IS_NEGATIVE macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d89b7d03d4c8..772023ace749 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1918,18 +1918,14 @@ static inline void inc_ap_pending(struct drbd_device *device) atomic_inc(&device->ap_pending_cnt); } -#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ - if (atomic_read(&device->which) < 0) \ - drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \ - func, line, \ - atomic_read(&device->which)) - -#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__) -static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) +#define dec_ap_pending(device) ((void)expect((device), __dec_ap_pending(device) >= 0)) +static inline int __dec_ap_pending(struct drbd_device *device) { - if (atomic_dec_and_test(&device->ap_pending_cnt)) + int ap_pending_cnt = atomic_dec_return(&device->ap_pending_cnt); + + if (ap_pending_cnt == 0) wake_up(&device->misc_wait); - ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); + return ap_pending_cnt; } /* counts how many resync-related answers we still expect from the peer @@ -1943,11 +1939,10 @@ static inline void inc_rs_pending(struct drbd_device *device) atomic_inc(&device->rs_pending_cnt); } -#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__) -static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) +#define dec_rs_pending(device) ((void)expect((device), __dec_rs_pending(device) >= 0)) +static inline int __dec_rs_pending(struct drbd_device *device) { - atomic_dec(&device->rs_pending_cnt); - ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); + return atomic_dec_return(&device->rs_pending_cnt); } /* counts how many answers we still need to send to the peer. @@ -1964,18 +1959,16 @@ static inline void inc_unacked(struct drbd_device *device) atomic_inc(&device->unacked_cnt); } -#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__) -static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) +#define dec_unacked(device) ((void)expect(device, __dec_unacked(device) >= 0)) +static inline int __dec_unacked(struct drbd_device *device) { - atomic_dec(&device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_dec_return(&device->unacked_cnt); } -#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__) -static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) +#define sub_unacked(device, n) ((void)expect(device, __sub_unacked(device) >= 0)) +static inline int __sub_unacked(struct drbd_device *device, int n) { - atomic_sub(n, &device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_sub_return(n, &device->unacked_cnt); } static inline bool is_sync_target_state(enum drbd_conns connection_state) -- cgit v1.2.3 From 8164dd6c8ae158ec0740bf37f0f14645a1fb5355 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Mar 2023 12:27:40 +0200 Subject: drbd: Add peer device parameter to whole-bitmap I/O handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass a peer device parameter through the bitmap I/O functions to the I/O handlers. In after_state_ch(), set that parameter when queuing the drbd_send_bitmap operation so that this operation knows where to send the bitmap. Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_bitmap.c | 13 ++++++--- drivers/block/drbd/drbd_int.h | 39 ++++++++++++++++---------- drivers/block/drbd/drbd_main.c | 57 +++++++++++++++++++++++--------------- drivers/block/drbd/drbd_nl.c | 19 +++++++------ drivers/block/drbd/drbd_receiver.c | 8 +++--- drivers/block/drbd/drbd_state.c | 19 +++++++------ drivers/block/drbd/drbd_worker.c | 3 +- 7 files changed, 96 insertions(+), 62 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 289876ffbc31..6ac8c54b44c7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1216,7 +1216,9 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned * drbd_bm_read() - Read the whole bitmap from its on disk location. * @device: DRBD device. */ -int drbd_bm_read(struct drbd_device *device) __must_hold(local) +int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { return bm_rw(device, BM_AIO_READ, 0); } @@ -1227,7 +1229,8 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local) * * Will only write pages that have changed since last IO. */ -int drbd_bm_write(struct drbd_device *device) __must_hold(local) +int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, 0, 0); } @@ -1238,7 +1241,8 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local) * * Will write all pages. */ -int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); } @@ -1264,7 +1268,8 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho * verify is aborted due to a failed peer disk, while local IO continues, or * pending resync acks are still being processed. */ -int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_COPY_PAGES, 0); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 772023ace749..7bd10089bfc9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -66,6 +66,7 @@ extern int drbd_proc_details; struct drbd_device; struct drbd_connection; +struct drbd_peer_device; /* Defines to control fault insertion */ enum { @@ -541,9 +542,10 @@ struct drbd_md_io { struct bm_io_work { struct drbd_work w; + struct drbd_peer_device *peer_device; char *why; enum bm_flag flags; - int (*io_fn)(struct drbd_device *device); + int (*io_fn)(struct drbd_device *device, struct drbd_peer_device *peer_device); void (*done)(struct drbd_device *device, int rv); }; @@ -1041,7 +1043,7 @@ extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector, enum drbd_packet cmd); extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size); -extern int drbd_send_bitmap(struct drbd_device *device); +extern int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device); extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *); @@ -1065,17 +1067,22 @@ extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold extern int drbd_md_test_flag(struct drbd_backing_dev *, int); extern void drbd_md_mark_dirty(struct drbd_device *device); extern void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags); + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); -extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); +extern int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); /* Meta data layout * @@ -1284,14 +1291,18 @@ extern void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); -extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); -extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); -extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); -extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern size_t drbd_bm_words(struct drbd_device *device); extern unsigned long drbd_bm_bits(struct drbd_device *device); extern sector_t drbd_bm_capacity(struct drbd_device *device); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2c764f7ee4a7..178a7ae40af8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1198,10 +1198,11 @@ static int fill_bitmap_rle_bits(struct drbd_device *device, * code upon failure. */ static int -send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) +send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + struct drbd_device *device = peer_device->device; + struct drbd_socket *sock = &peer_device->connection->data; + unsigned int header_size = drbd_header_size(peer_device->connection); struct p_compressed_bm *p = sock->sbuf + header_size; int len, err; @@ -1212,7 +1213,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, + err = __send_command(peer_device->connection, device->vnr, sock, P_COMPRESSED_BITMAP, sizeof(*p) + len, NULL, 0); c->packets[0]++; @@ -1233,7 +1234,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) len = num_words * sizeof(*p); if (len) drbd_bm_get_lel(device, c->word_offset, num_words, p); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0); + err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP, + len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1254,7 +1256,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) } /* See the comment at receive_bitmap() */ -static int _drbd_send_bitmap(struct drbd_device *device) +static int _drbd_send_bitmap(struct drbd_device *device, + struct drbd_peer_device *peer_device) { struct bm_xfer_ctx c; int err; @@ -1266,7 +1269,7 @@ static int _drbd_send_bitmap(struct drbd_device *device) if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) { drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(device); - if (drbd_bm_write(device)) { + if (drbd_bm_write(device, peer_device)) { /* write_bm did fail! Leave full sync flag set in Meta P_DATA * but otherwise process as per normal - need to tell other * side that a full resync is required! */ @@ -1285,20 +1288,20 @@ static int _drbd_send_bitmap(struct drbd_device *device) }; do { - err = send_bitmap_rle_or_plain(device, &c); + err = send_bitmap_rle_or_plain(peer_device, &c); } while (err > 0); return err == 0; } -int drbd_send_bitmap(struct drbd_device *device) +int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; + struct drbd_socket *sock = &peer_device->connection->data; int err = -1; mutex_lock(&sock->mutex); if (sock->socket) - err = !_drbd_send_bitmap(device); + err = !_drbd_send_bitmap(device, peer_device); mutex_unlock(&sock->mutex); return err; } @@ -3406,7 +3409,9 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { int rv = -EIO; @@ -3414,7 +3419,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) drbd_md_sync(device); drbd_bm_set_all(device); - rv = drbd_bm_write(device); + rv = drbd_bm_write(device, peer_device); if (!rv) { drbd_md_clear_flag(device, MDF_FULL_SYNC); @@ -3430,11 +3435,13 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { drbd_resume_al(device); drbd_bm_clear_all(device); - return drbd_bm_write(device); + return drbd_bm_write(device, peer_device); } static int w_bitmap_io(struct drbd_work *w, int unused) @@ -3453,7 +3460,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) if (get_ldev(device)) { drbd_bm_lock(device, work->why, work->flags); - rv = work->io_fn(device); + rv = work->io_fn(device, work->peer_device); drbd_bm_unlock(device); put_ldev(device); } @@ -3488,11 +3495,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused) * put_ldev(). */ void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags) + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { - D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); + D_ASSERT(device, current == peer_device->connection->worker.task); D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags)); D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags)); @@ -3501,6 +3509,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n", why, device->bm_io_work.why); + device->bm_io_work.peer_device = peer_device; device->bm_io_work.io_fn = io_fn; device->bm_io_work.done = done; device->bm_io_work.why = why; @@ -3512,7 +3521,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * application IO does not conflict anyways. */ if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&peer_device->connection->sender_work, &device->bm_io_work.w); } spin_unlock_irq(&device->resource->req_lock); @@ -3528,8 +3537,10 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. */ -int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) +int drbd_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { /* Only suspend io, if some operation is supposed to be locked out */ const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST); @@ -3541,7 +3552,7 @@ int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device * drbd_suspend_io(device); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); if (do_suspend_io) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 60757ac31701..8967298968f3 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1053,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + "size changed", BM_LOCKED_MASK, NULL); /* on-disk bitmap and activity log is authoritative again * (unless there was an IO error meanwhile...) */ @@ -2027,13 +2027,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_info(device, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from attaching", BM_LOCKED_MASK)) { + "set_n_write from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { if (drbd_bitmap_io(device, &drbd_bm_read, - "read from attaching", BM_LOCKED_MASK)) { + "read from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -2972,7 +2974,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from invalidate", BM_LOCKED_MASK)) + "set_n_write from invalidate", BM_LOCKED_MASK, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -3005,11 +3007,12 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) +static int drbd_bmio_set_susp_al(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { int rv; - rv = drbd_bmio_set_n_write(device); + rv = drbd_bmio_set_n_write(device, peer_device); drbd_suspend_al(device); return rv; } @@ -3052,7 +3055,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -4148,7 +4151,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) if (args.clear_bm) { err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, - "clear_n_write from new_c_uuid", BM_LOCKED_MASK); + "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL); if (err) { drbd_err(device, "Writing bitmap failed with %d\n", err); retcode = ERR_IO_MD_DISK; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 757f4692b5bd..e70076fe1f2e 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3591,7 +3591,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, if (abs(hg) >= 2) { drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) return C_MASK; } @@ -4270,7 +4270,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from receive_uuids", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, NULL); _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); _drbd_uuid_set(device, UI_BITMAP, 0); _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), @@ -4877,7 +4877,7 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info if (device->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; - err = drbd_send_bitmap(device); + err = drbd_send_bitmap(device, peer_device); if (err) goto out; /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ @@ -5214,7 +5214,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) if (get_ldev(device)) { drbd_bitmap_io(device, &drbd_bm_write_copy_pages, - "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); + "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL); put_ldev(device); } diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 75d13ea0024f..c92dc6093b0a 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1518,8 +1518,9 @@ static void abw_start_sync(struct drbd_device *device, int rv) } int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { int rv; @@ -1529,7 +1530,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, atomic_inc(&device->suspend_cnt); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); drbd_resume_io(device); @@ -1809,7 +1810,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, device->state.conn == C_WF_BITMAP_S) drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, peer_device); /* Lost contact to peer's copy of the data */ if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) { @@ -1839,7 +1840,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, * No harm done if the bitmap still changes, * redirtied pages will follow later. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote diskless peer", BM_LOCKED_SET_ALLOWED); + "demote diskless peer", BM_LOCKED_SET_ALLOWED, peer_device); put_ldev(device); } @@ -1851,7 +1852,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* No changes to the bitmap expected this time, so assert that, * even though no harm was done if it did change. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote", BM_LOCKED_TEST_ALLOWED); + "demote", BM_LOCKED_TEST_ALLOWED, peer_device); put_ldev(device); } @@ -1888,7 +1889,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* no other bitmap changes expected during this phase */ drbd_queue_bitmap_io(device, &drbd_bmio_set_n_write, &abw_start_sync, - "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED, + peer_device); /* first half of local IO error, failure to attach, * or administrative detach */ @@ -2011,7 +2013,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) && (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) { drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, - "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED, + peer_device); put_ldev(device); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f46738040d6b..68d5ba4af17d 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1945,6 +1945,7 @@ static void drbd_ldev_destroy(struct drbd_device *device) static void go_diskless(struct drbd_device *device) { + struct drbd_peer_device *peer_device = first_peer_device(device); D_ASSERT(device, device->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch @@ -1970,7 +1971,7 @@ static void go_diskless(struct drbd_device *device) * Any modifications would not be expected anymore, though. */ if (drbd_bitmap_io_from_worker(device, drbd_bm_write, - "detach", BM_LOCKED_TEST_ALLOWED)) { + "detach", BM_LOCKED_TEST_ALLOWED, peer_device)) { if (test_bit(WAS_READ_ERROR, &device->flags)) { drbd_md_set_flag(device, MDF_FULL_SYNC); drbd_md_sync(device); -- cgit v1.2.3 From 5e54c2a6010bc88e33a5a66aa16c95fa5d017065 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 30 Mar 2023 12:27:41 +0200 Subject: drbd: INFO_bm_xfer_stats(): Pass a peer device argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Andreas Gruenbacher Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 4 ++-- drivers/block/drbd/drbd_main.c | 2 +- drivers/block/drbd/drbd_receiver.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7bd10089bfc9..97c091990bf6 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -127,8 +127,8 @@ struct bm_xfer_ctx { unsigned bytes[2]; }; -extern void INFO_bm_xfer_stats(struct drbd_device *device, - const char *direction, struct bm_xfer_ctx *c); +extern void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, + const char *direction, struct bm_xfer_ctx *c); static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 178a7ae40af8..6647f84f3879 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1247,7 +1247,7 @@ send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ct } if (!err) { if (len == 0) { - INFO_bm_xfer_stats(device, "send", c); + INFO_bm_xfer_stats(peer_device, "send", c); return 0; } else return 1; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e70076fe1f2e..c6f93a9087b1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4766,11 +4766,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device, return -EIO; } -void INFO_bm_xfer_stats(struct drbd_device *device, +void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + unsigned int header_size = drbd_header_size(peer_device->connection); unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; unsigned int plain = header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + @@ -4794,7 +4794,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device, r = 1000; r = 1000 - r; - drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], @@ -4872,7 +4872,7 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info goto out; } - INFO_bm_xfer_stats(device, "receive", &c); + INFO_bm_xfer_stats(peer_device, "receive", &c); if (device->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; -- cgit v1.2.3 From db445db1cde540f819265dcae2d916a35616bda0 Mon Sep 17 00:00:00 2001 From: Christoph Böhmwalder Date: Thu, 30 Mar 2023 12:27:42 +0200 Subject: drbd: drbd_uuid_compare: pass a peer_device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_receiver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c6f93a9087b1..e352880c70b5 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3226,10 +3226,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, -1096 requires proto 96 */ -static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_peer_device *const peer_device, + enum drbd_role const peer_role, int *rule_nr) __must_hold(local) { - struct drbd_peer_device *const peer_device = first_peer_device(device); - struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; + struct drbd_connection *const connection = peer_device->connection; + struct drbd_device *device = peer_device->device; u64 self, peer; int i, j; @@ -3465,7 +3466,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); - hg = drbd_uuid_compare(device, peer_role, &rule_nr); + hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr); spin_unlock_irq(&device->ldev->md.uuid_lock); drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); -- cgit v1.2.3 From ad878a0d8815a291a1cbb2dc8279dc2910c999cc Mon Sep 17 00:00:00 2001 From: Christoph Böhmwalder Date: Thu, 30 Mar 2023 12:27:43 +0200 Subject: drbd: pass drbd_peer_device to __req_mod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to support multiple connections, we need to know which one we need to modify the request state for. Originally-from: Lars Ellenberg Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 13 +++++++++---- drivers/block/drbd/drbd_receiver.c | 18 ++++++++++-------- drivers/block/drbd/drbd_req.c | 21 +++++++++++++-------- drivers/block/drbd/drbd_req.h | 11 +++++++---- drivers/block/drbd/drbd_worker.c | 14 +++++++------- 5 files changed, 46 insertions(+), 31 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6647f84f3879..83987e7a5ef2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -231,9 +231,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, } req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { + struct drbd_peer_device *peer_device; if (req->epoch != expect_epoch) break; - _req_mod(req, BARRIER_ACKED); + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, BARRIER_ACKED, peer_device); } spin_unlock_irq(&connection->resource->req_lock); @@ -256,10 +258,13 @@ bail: /* must hold resource->req_lock */ void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what) { + struct drbd_peer_device *peer_device; struct drbd_request *req, *r; - list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) - _req_mod(req, what); + list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, what, peer_device); + } } void tl_restart(struct drbd_connection *connection, enum drbd_req_event what) @@ -297,7 +302,7 @@ void tl_abort_disk_io(struct drbd_device *device) continue; if (req->device != device) continue; - _req_mod(req, ABORT_DISK_IO); + _req_mod(req, ABORT_DISK_IO, NULL); } spin_unlock_irq(&connection->resource->req_lock); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e352880c70b5..856c0e3a6630 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2138,7 +2138,7 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i err = recv_dless_read(peer_device, req, sector, pi->size); if (!err) - req_mod(req, DATA_RECEIVED); + req_mod(req, DATA_RECEIVED, peer_device); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ @@ -2196,7 +2196,7 @@ static void restart_conflicting_writes(struct drbd_device *device, continue; /* as it is RQ_POSTPONED, this will cause it to * be queued on the retry workqueue. */ - __req_mod(req, CONFLICT_RESOLVED, NULL); + __req_mod(req, CONFLICT_RESOLVED, NULL, NULL); } } @@ -2420,6 +2420,7 @@ static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { + struct drbd_peer_device *peer_device = first_peer_device(device); struct drbd_interval *i; repeat: @@ -2433,7 +2434,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector, if (!(req->rq_state & RQ_POSTPONED)) continue; req->rq_state &= ~RQ_POSTPONED; - __req_mod(req, NEG_ACKED, &m); + __req_mod(req, NEG_ACKED, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) complete_master_bio(device, &m); @@ -5661,10 +5662,11 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info * } static int -validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, +validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct rb_root *root, const char *func, enum drbd_req_event what, bool missing_ok) { + struct drbd_device *device = peer_device->device; struct drbd_request *req; struct bio_and_error m; @@ -5674,7 +5676,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto spin_unlock_irq(&device->resource->req_lock); return -EIO; } - __req_mod(req, what, &m); + __req_mod(req, what, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) @@ -5723,7 +5725,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * BUG(); } - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, what, false); } @@ -5750,7 +5752,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi return 0; } - err = validate_req_change_req_state(device, p->block_id, sector, + err = validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, NEG_ACKED, true); if (err) { @@ -5781,7 +5783,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->read_requests, __func__, NEG_ACKED, false); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e36216d50753..528f29ebf369 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -552,12 +552,15 @@ static inline bool is_pending_write_protocol_A(struct drbd_request *req) * happen "atomically" within the req_lock, * and it enforces that we have to think in a very structured manner * about the "events" that may happen to a request during its life time ... + * + * + * peer_device == NULL means local disk */ int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m) { struct drbd_device *const device = req->device; - struct drbd_peer_device *const peer_device = first_peer_device(device); struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; struct net_conf *nc; int p, rv = 0; @@ -1100,6 +1103,7 @@ static bool drbd_should_send_out_of_sync(union drbd_dev_state s) static int drbd_process_write_request(struct drbd_request *req) { struct drbd_device *device = req->device; + struct drbd_peer_device *peer_device = first_peer_device(device); int remote, send_oos; remote = drbd_should_do_remote(device->state); @@ -1115,7 +1119,7 @@ static int drbd_process_write_request(struct drbd_request *req) /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH); if (remote) - _req_mod(req, QUEUE_AS_DRBD_BARRIER); + _req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device); return remote; } @@ -1125,10 +1129,10 @@ static int drbd_process_write_request(struct drbd_request *req) D_ASSERT(device, !(remote && send_oos)); if (remote) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_WRITE); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size)) - _req_mod(req, QUEUE_FOR_SEND_OOS); + _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); return remote; } @@ -1312,6 +1316,7 @@ static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { struct drbd_resource *resource = device->resource; + struct drbd_peer_device *peer_device = first_peer_device(device); const int rw = bio_data_dir(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; @@ -1375,8 +1380,8 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request /* We either have a private_bio, or we can read from remote. * Otherwise we had done the goto nodata above. */ if (req->private_bio == NULL) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_READ); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_READ, peer_device); } else no_remote = true; } @@ -1397,7 +1402,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request req->pre_submit_jif = jiffies; list_add_tail(&req->req_pending_local, &device->pending_completion[rw == WRITE]); - _req_mod(req, TO_BE_SUBMITTED); + _req_mod(req, TO_BE_SUBMITTED, NULL); /* but we need to give up the spinlock to submit */ submit_private_bio = true; } else if (no_remote) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index b4017b5c3fbc..9ae860e7591b 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -267,6 +267,7 @@ struct bio_and_error { extern void start_new_tl_epoch(struct drbd_connection *connection); extern void drbd_req_destroy(struct kref *kref); extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m); extern void complete_master_bio(struct drbd_device *device, struct bio_and_error *m); @@ -280,14 +281,15 @@ extern void drbd_restart_request(struct drbd_request *req); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ -static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) +static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device) { struct drbd_device *device = req->device; struct bio_and_error m; int rv; /* __req_mod possibly frees req, do not touch req after that! */ - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); if (m.bio) complete_master_bio(device, &m); @@ -299,7 +301,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) * of the lower level driver completion callback, so we need to * spin_lock_irqsave here. */ static inline int req_mod(struct drbd_request *req, - enum drbd_req_event what) + enum drbd_req_event what, + struct drbd_peer_device *peer_device) { unsigned long flags; struct drbd_device *device = req->device; @@ -307,7 +310,7 @@ static inline int req_mod(struct drbd_request *req, int rv; spin_lock_irqsave(&device->resource->req_lock, flags); - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); if (m.bio) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 68d5ba4af17d..6455edca7aa9 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -276,7 +276,7 @@ void drbd_request_endio(struct bio *bio) /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); - __req_mod(req, what, &m); + __req_mod(req, what, NULL, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); put_ldev(device); @@ -1425,7 +1425,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1437,7 +1437,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) maybe_send_barrier(connection, req->epoch); err = drbd_send_out_of_sync(peer_device, req); - req_mod(req, OOS_HANDED_TO_NETWORK); + req_mod(req, OOS_HANDED_TO_NETWORK, peer_device); return err; } @@ -1457,7 +1457,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1467,7 +1467,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) connection->send.current_epoch_writes++; err = drbd_send_dblock(peer_device, req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); @@ -1490,7 +1490,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1502,7 +1502,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); -- cgit v1.2.3 From 0d11f3cf279c5ad20a41f29242f170ba3c02f2da Mon Sep 17 00:00:00 2001 From: Christoph Böhmwalder Date: Thu, 30 Mar 2023 12:27:44 +0200 Subject: drbd: Pass a peer device to the resync and online verify functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally-from: Andreas Grünbacher Signed-off-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20230330102744.2128122-2-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 13 +++-- drivers/block/drbd/drbd_int.h | 46 +++++++++--------- drivers/block/drbd/drbd_receiver.c | 59 +++++++++++------------ drivers/block/drbd/drbd_req.c | 9 ++-- drivers/block/drbd/drbd_state.c | 10 ++-- drivers/block/drbd/drbd_worker.c | 97 ++++++++++++++++++++------------------ 6 files changed, 126 insertions(+), 108 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 429255876800..64b3a1c76f03 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -735,8 +735,9 @@ static bool update_rs_extent(struct drbd_device *device, return false; } -void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) +void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go) { + struct drbd_device *device = peer_device->device; unsigned long now = jiffies; unsigned long last = device->rs_mark_time[device->rs_last_mark]; int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; @@ -819,7 +820,7 @@ static int update_sync_bits(struct drbd_device *device, if (mode == SET_IN_SYNC) { unsigned long still_to_go = drbd_bm_total_weight(device); bool rs_is_done = (still_to_go <= device->rs_failed); - drbd_advance_rs_marks(device, still_to_go); + drbd_advance_rs_marks(first_peer_device(device), still_to_go); if (cleared || rs_is_done) maybe_schedule_on_disk_bitmap_update(device, rs_is_done); } else if (mode == RECORD_RS_FAILED) @@ -843,10 +844,11 @@ static bool plausible_request_size(int size) * called by worker on C_SYNC_TARGET and receiver on SyncSource. * */ -int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode) { /* Is called from worker and receiver context _only_ */ + struct drbd_device *device = peer_device->device; unsigned long sbnr, ebnr, lbnr; unsigned long count = 0; sector_t esector, nr_sectors; @@ -1009,14 +1011,15 @@ retry: * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN * if there is still application IO going on in this area. */ -int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) +int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector) { + struct drbd_device *device = peer_device->device; unsigned int enr = BM_SECT_TO_EXT(sector); const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; struct lc_element *e; struct bm_extent *bm_ext; int i; - bool throttle = drbd_rs_should_slow_down(device, sector, true); + bool throttle = drbd_rs_should_slow_down(peer_device, sector, true); /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, * not yet BME_LOCKED) extent needs to be kicked out explicitly if we diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 97c091990bf6..a30a5ed811be 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1433,21 +1433,24 @@ void drbd_resync_after_changed(struct drbd_device *device); extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side); extern void resume_next_sg(struct drbd_device *device); extern void suspend_other_sg(struct drbd_device *device); -extern int drbd_resync_finished(struct drbd_device *device); +extern int drbd_resync_finished(struct drbd_peer_device *peer_device); /* maybe rather drbd_main.c ? */ extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); extern void drbd_md_put_buffer(struct drbd_device *device); extern int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, enum req_op op); -extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int); +extern void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, + sector_t sector, int size); extern void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done); -extern void drbd_rs_controller_reset(struct drbd_device *device); +extern void drbd_rs_controller_reset(struct drbd_peer_device *peer_device); -static inline void ov_out_of_sync_print(struct drbd_device *device) +static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; + if (device->ov_last_oos_size) { - drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n", + drbd_err(peer_device, "Out of sync: start=%llu, size=%lu (sectors)\n", (unsigned long long)device->ov_last_oos_start, (unsigned long)device->ov_last_oos_size); } @@ -1486,7 +1489,7 @@ extern int drbd_ack_receiver(struct drbd_thread *thi); extern void drbd_send_ping_wf(struct work_struct *ws); extern void drbd_send_acks_wf(struct work_struct *ws); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); -extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +extern bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); @@ -1542,22 +1545,22 @@ extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); -extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector); extern void drbd_rs_cancel_all(struct drbd_device *device); extern int drbd_rs_del_all(struct drbd_device *device); -extern void drbd_rs_failed_io(struct drbd_device *device, +extern void drbd_rs_failed_io(struct drbd_peer_device *peer_device, sector_t sector, int size); -extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); +extern void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go); enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; -extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +extern int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode); -#define drbd_set_in_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_IN_SYNC) -#define drbd_set_out_of_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC) -#define drbd_rs_failed_io(device, sector, size) \ - __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) +#define drbd_set_in_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_IN_SYNC) +#define drbd_set_out_of_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_OUT_OF_SYNC) +#define drbd_rs_failed_io(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, RECORD_RS_FAILED) extern void drbd_al_shrink(struct drbd_device *device); extern int drbd_al_initialize(struct drbd_device *, void *); @@ -1945,15 +1948,16 @@ static inline int __dec_ap_pending(struct drbd_device *device) * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER) * (or P_NEG_ACK with ID_SYNCER) */ -static inline void inc_rs_pending(struct drbd_device *device) +static inline void inc_rs_pending(struct drbd_peer_device *peer_device) { - atomic_inc(&device->rs_pending_cnt); + atomic_inc(&peer_device->device->rs_pending_cnt); } -#define dec_rs_pending(device) ((void)expect((device), __dec_rs_pending(device) >= 0)) -static inline int __dec_rs_pending(struct drbd_device *device) +#define dec_rs_pending(peer_device) \ + ((void)expect((peer_device), __dec_rs_pending(peer_device) >= 0)) +static inline int __dec_rs_pending(struct drbd_peer_device *peer_device) { - return atomic_dec_return(&device->rs_pending_cnt); + return atomic_dec_return(&peer_device->device->rs_pending_cnt); } /* counts how many answers we still need to send to the peer. diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 856c0e3a6630..e54404c632e7 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2044,11 +2044,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused) D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(device, sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); } @@ -2067,7 +2067,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto if (!peer_req) goto fail; - dec_rs_pending(device); + dec_rs_pending(peer_device); inc_unacked(device); /* corresponding dec_unacked() in e_end_resync_block() @@ -2220,7 +2220,7 @@ static int e_end_block(struct drbd_work *w, int cancel) P_RS_WRITE_ACK : P_WRITE_ACK; err = drbd_send_ack(peer_device, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); } else { err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... @@ -2691,7 +2691,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * if (device->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); peer_req->flags &= ~EE_MAY_SET_IN_SYNC; drbd_al_begin_io(device, &peer_req->i); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; @@ -2730,9 +2730,10 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting) { + struct drbd_device *device = peer_device->device; struct lc_element *tmp; bool throttle = drbd_rs_c_min_rate_throttle(device); @@ -2844,7 +2845,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet break; case P_OV_REPLY: verb = 0; - dec_rs_pending(device); + dec_rs_pending(peer_device); drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); break; default: @@ -2915,7 +2916,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); peer_req->w.cb = w_e_end_ov_reply; - dec_rs_pending(device); + dec_rs_pending(peer_device); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ goto submit_for_resync; @@ -2978,7 +2979,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet update_receiver_timing_details(connection, drbd_rs_should_slow_down); if (device->state.peer != R_PRIMARY - && drbd_rs_should_slow_down(device, sector, false)) + && drbd_rs_should_slow_down(peer_device, sector, false)) schedule_timeout_uninterruptible(HZ/10); update_receiver_timing_details(connection, drbd_rs_begin_io); if (drbd_rs_begin_io(device, sector)) @@ -4450,7 +4451,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info else if (os.conn >= C_SYNC_SOURCE && peer_state.conn == C_CONNECTED) { if (drbd_bm_total_weight(device) <= device->rs_failed) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } } @@ -4458,8 +4459,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info /* explicit verify finished notification, stop sector reached. */ if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); return 0; } @@ -4937,7 +4938,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet drbd_conn_str(device->state.conn)); } - drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); + drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); return 0; } @@ -4958,7 +4959,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev(device)) { struct drbd_peer_request *peer_req; @@ -5650,12 +5651,12 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info * if (get_ldev(device)) { drbd_rs_complete_io(device, sector); - drbd_set_in_sync(device, sector, blksize); + drbd_set_in_sync(peer_device, sector, blksize); /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); put_ldev(device); } - dec_rs_pending(device); + dec_rs_pending(peer_device); atomic_add(blksize >> 9, &device->rs_sect_in); return 0; @@ -5701,8 +5702,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - drbd_set_in_sync(device, sector, blksize); - dec_rs_pending(device); + drbd_set_in_sync(peer_device, sector, blksize); + dec_rs_pending(peer_device); return 0; } switch (pi->cmd) { @@ -5747,8 +5748,8 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - dec_rs_pending(device); - drbd_rs_failed_io(device, sector, size); + dec_rs_pending(peer_device); + drbd_rs_failed_io(peer_device, sector, size); return 0; } @@ -5761,7 +5762,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi request is no longer in the collision hash. */ /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } return 0; } @@ -5806,13 +5807,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev_if_state(device, D_FAILED)) { drbd_rs_complete_io(device, sector); switch (pi->cmd) { case P_NEG_RS_DREPLY: - drbd_rs_failed_io(device, sector, size); + drbd_rs_failed_io(peer_device, sector, size); break; case P_RS_CANCEL: break; @@ -5869,21 +5870,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); if (!get_ldev(device)) return 0; drbd_rs_complete_io(device, sector); - dec_rs_pending(device); + dec_rs_pending(peer_device); --device->ov_left; /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); if (device->ov_left == 0) { dw = kmalloc(sizeof(*dw), GFP_NOIO); @@ -5893,8 +5894,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * drbd_queue_work(&peer_device->connection->sender_work, &dw->w); } else { drbd_err(device, "kmalloc(dw) failed."); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } } put_ldev(device); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 528f29ebf369..380e6584a4ee 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -122,12 +122,13 @@ void drbd_req_destroy(struct kref *kref) * before it even was submitted or sent. * In that case we do not want to touch the bitmap at all. */ + struct drbd_peer_device *peer_device = first_peer_device(device); if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(device, req->i.sector, req->i.size); + drbd_set_in_sync(peer_device, req->i.sector, req->i.size); } /* one might be tempted to move the drbd_al_complete_io @@ -620,7 +621,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough; @@ -1131,7 +1132,7 @@ static int drbd_process_write_request(struct drbd_request *req) if (remote) { _req_mod(req, TO_BE_SENT, peer_device); _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); - } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size)) + } else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size)) _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); return remote; diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c92dc6093b0a..563e67f1ead9 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1222,9 +1222,11 @@ void drbd_resume_al(struct drbd_device *device) } /* helper for _drbd_set_state */ -static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) +static void set_ov_position(struct drbd_peer_device *peer_device, enum drbd_conns cs) { - if (first_peer_device(device)->connection->agreed_pro_version < 90) + struct drbd_device *device = peer_device->device; + + if (peer_device->connection->agreed_pro_version < 90) device->ov_start_sector = 0; device->rs_total = drbd_bm_bits(device); device->ov_position = 0; @@ -1387,7 +1389,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, unsigned long now = jiffies; int i; - set_ov_position(device, ns.conn); + set_ov_position(peer_device, ns.conn); device->rs_start = now; device->rs_last_sect_ev = 0; device->ov_last_oos_size = 0; @@ -1398,7 +1400,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, device->rs_mark_time[i] = now; } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); if (ns.conn == C_VERIFY_S) { drbd_info(device, "Starting Online Verify from sector %llu\n", diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 6455edca7aa9..4352a50fbb3f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -28,8 +28,8 @@ #include "drbd_protocol.h" #include "drbd_req.h" -static int make_ov_request(struct drbd_device *, int); -static int make_resync_request(struct drbd_device *, int); +static int make_ov_request(struct drbd_peer_device *, int); +static int make_resync_request(struct drbd_peer_device *, int); /* endio handlers: * drbd_md_endio (defined here) @@ -124,7 +124,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l * In case of a write error, send the neg ack anyways. */ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) inc_unacked(device); - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); } spin_lock_irqsave(&device->resource->req_lock, flags); @@ -363,7 +363,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_CSUM_RS_REQUEST); @@ -430,10 +430,10 @@ int w_resync_timer(struct drbd_work *w, int cancel) switch (device->state.conn) { case C_VERIFY_S: - make_ov_request(device, cancel); + make_ov_request(first_peer_device(device), cancel); break; case C_SYNC_TARGET: - make_resync_request(device, cancel); + make_resync_request(first_peer_device(device), cancel); break; } @@ -493,8 +493,9 @@ struct fifo_buffer *fifo_alloc(unsigned int fifo_size) return fb; } -static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) +static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in) { + struct drbd_device *device = peer_device->device; struct disk_conf *dc; unsigned int want; /* The number of sectors we want in-flight */ int req_sect; /* Number of sectors to request in this turn */ @@ -545,8 +546,9 @@ static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) return req_sect; } -static int drbd_rs_number_requests(struct drbd_device *device) +static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; unsigned int sect_in; /* Number of sectors that came in since the last turn */ int number, mxb; @@ -556,7 +558,7 @@ static int drbd_rs_number_requests(struct drbd_device *device) rcu_read_lock(); mxb = drbd_get_max_buffers(device) / 2; if (rcu_dereference(device->rs_plan_s)->size) { - number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); + number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9); device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; @@ -580,9 +582,9 @@ static int drbd_rs_number_requests(struct drbd_device *device) return number; } -static int make_resync_request(struct drbd_device *const device, int cancel) +static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel) { - struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_device *const device = peer_device->device; struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; unsigned long bit; sector_t sector; @@ -598,7 +600,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) if (device->rs_total == 0) { /* empty resync? */ - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } @@ -618,7 +620,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) } max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); if (number <= 0) goto requeue; @@ -653,7 +655,7 @@ next_sector: sector = BM_BIT_TO_SECT(bit); - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->bm_resync_fo = bit; goto requeue; } @@ -729,13 +731,13 @@ next_sector: } else { int err; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest(peer_device, size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); - dec_rs_pending(device); + dec_rs_pending(peer_device); put_ldev(device); return err; } @@ -760,8 +762,9 @@ next_sector: return 0; } -static int make_ov_request(struct drbd_device *device, int cancel) +static int make_ov_request(struct drbd_peer_device *peer_device, int cancel) { + struct drbd_device *device = peer_device->device; int number, i, size; sector_t sector; const sector_t capacity = get_capacity(device->vdisk); @@ -770,7 +773,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (unlikely(cancel)) return 1; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); sector = device->ov_position; for (i = 0; i < number; i++) { @@ -788,7 +791,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) size = BM_BLOCK_SIZE; - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->ov_position = sector; goto requeue; } @@ -796,9 +799,9 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - inc_rs_pending(device); + inc_rs_pending(peer_device); if (drbd_send_ov_request(first_peer_device(device), sector, size)) { - dec_rs_pending(device); + dec_rs_pending(peer_device); return 0; } sector += BM_SECT_PER_BIT; @@ -818,8 +821,8 @@ int w_ov_finished(struct drbd_work *w, int cancel) container_of(w, struct drbd_device_work, w); struct drbd_device *device = dw->device; kfree(dw); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(first_peer_device(device)); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -831,7 +834,7 @@ static int w_resync_finished(struct drbd_work *w, int cancel) struct drbd_device *device = dw->device; kfree(dw); - drbd_resync_finished(device); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -846,9 +849,10 @@ static void ping_peer(struct drbd_device *device) test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); } -int drbd_resync_finished(struct drbd_device *device) +int drbd_resync_finished(struct drbd_peer_device *peer_device) { - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_device *device = peer_device->device; + struct drbd_connection *connection = peer_device->connection; unsigned long db, dt, dbdt; unsigned long n_oos; union drbd_state os, ns; @@ -1129,7 +1133,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(device->state.pdsk >= D_INCONSISTENT)) { - inc_rs_pending(device); + inc_rs_pending(peer_device); if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) err = drbd_send_rs_deallocated(peer_device, peer_req); else @@ -1148,7 +1152,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } dec_unacked(device); @@ -1199,12 +1203,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) } if (eq) { - drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); } else { - inc_rs_pending(device); + inc_rs_pending(peer_device); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); @@ -1257,10 +1261,10 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); if (err) - dec_rs_pending(device); + dec_rs_pending(peer_device); kfree(digest); out: @@ -1270,15 +1274,16 @@ out: return err; } -void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) +void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size) { + struct drbd_device *device = peer_device->device; if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { device->ov_last_oos_size += size>>9; } else { device->ov_last_oos_start = sector; device->ov_last_oos_size = size>>9; } - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } int w_e_end_ov_reply(struct drbd_work *w, int cancel) @@ -1328,9 +1333,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); if (!eq) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); @@ -1341,14 +1346,14 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); stop_sector_reached = verify_can_do_stop_sector(device) && (sector + (size>>9)) >= device->ov_stop_sector; if (device->ov_left == 0 || stop_sector_reached) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } return err; @@ -1668,8 +1673,9 @@ void drbd_resync_after_changed(struct drbd_device *device) } while (changed); } -void drbd_rs_controller_reset(struct drbd_device *device) +void drbd_rs_controller_reset(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; struct gendisk *disk = device->ldev->backing_bdev->bd_disk; struct fifo_buffer *plan; @@ -1891,10 +1897,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) rcu_read_unlock(); schedule_timeout_interruptible(timeo); } - drbd_resync_finished(device); + drbd_resync_finished(peer_device); } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); /* ns.conn may already be != device->state.conn, * we may have been paused in between, or become paused until * the timer triggers. @@ -1909,8 +1915,9 @@ out: mutex_unlock(device->state_mutex); } -static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) +static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done) { + struct drbd_device *device = peer_device->device; struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; device->rs_last_bcast = jiffies; @@ -1919,7 +1926,7 @@ static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) drbd_bm_write_lazy(device, 0); if (resync_done && is_sync_state(device->state.conn)) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); drbd_bcast_event(device, &sib); /* update timestamp, in case it took a while to write out stuff */ @@ -2018,7 +2025,7 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo) do_md_sync(device); if (test_bit(RS_DONE, &todo) || test_bit(RS_PROGRESS, &todo)) - update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); + update_on_disk_bitmap(first_peer_device(device), test_bit(RS_DONE, &todo)); if (test_bit(GO_DISKLESS, &todo)) go_diskless(device); if (test_bit(DESTROY_DISK, &todo)) -- cgit v1.2.3 From 903f8aeea9fd1b97fba4ab805ddd639f57f117f8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Mar 2023 19:36:20 +0800 Subject: block: ublk_drv: add common exit handling Simplify exit handling a bit, and prepare for supporting fused command. Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d1d1c8d606c8..7b3d551938f4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -655,14 +655,15 @@ static void ublk_complete_rq(struct request *req) struct ublk_queue *ubq = req->mq_hctx->driver_data; struct ublk_io *io = &ubq->ios[req->tag]; unsigned int unmapped_bytes; + blk_status_t res = BLK_STS_OK; /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) io->res = -EIO; if (io->res < 0) { - blk_mq_end_request(req, errno_to_blk_status(io->res)); - return; + res = errno_to_blk_status(io->res); + goto exit; } /* @@ -671,10 +672,8 @@ static void ublk_complete_rq(struct request *req) * * Both the two needn't unmap. */ - if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) { - blk_mq_end_request(req, BLK_STS_OK); - return; - } + if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) + goto exit; /* for READ request, writing data in iod->addr to rq buffers */ unmapped_bytes = ublk_unmap_io(ubq, req, io); @@ -691,6 +690,10 @@ static void ublk_complete_rq(struct request *req) blk_mq_requeue_request(req, true); else __blk_mq_end_request(req, BLK_STS_OK); + + return; +exit: + blk_mq_end_request(req, res); } /* -- cgit v1.2.3 From 23ef8220f287abe5bf741ddfc278e7359742d3b1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Mar 2023 19:36:21 +0800 Subject: block: ublk_drv: don't consider flush request in map/unmap io There isn't data in request of REQ_OP_FLUSH always, so don't consider it in both ublk_map_io() and ublk_unmap_io(). Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 7b3d551938f4..67a6b0581b39 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -529,15 +529,13 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); + /* * no zero copy, we delay copy WRITE request data into ublksrv * context and the big benefit is that pinning pages in current * context is pretty fast, see ublk_pin_user_pages */ - if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH) - return rq_bytes; - - if (ublk_rq_has_data(req)) { + if (ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE) { struct ublk_map_data data = { .ubq = ubq, .rq = req, @@ -772,9 +770,7 @@ static inline void __ublk_rq_task_work(struct request *req) return; } - if (ublk_need_get_data(ubq) && - (req_op(req) == REQ_OP_WRITE || - req_op(req) == REQ_OP_FLUSH)) { + if (ublk_need_get_data(ubq) && (req_op(req) == REQ_OP_WRITE)) { /* * We have not handled UBLK_IO_NEED_GET_DATA command yet, * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv -- cgit v1.2.3 From 2f3af723447c35c16f3c6a1b4b317c61dc41d6c3 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Mar 2023 19:36:22 +0800 Subject: block: ublk_drv: add two helpers to clean up map/unmap request Add two helpers for checking if map/unmap is needed, since we may have passthrough request which needs map or unmap in future, such as for supporting report zones. Meantime don't mark ublk_copy_user_pages as inline since this function is a bit fat now. Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 67a6b0581b39..439bd5773715 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -488,8 +488,7 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data, return done; } -static inline int ublk_copy_user_pages(struct ublk_map_data *data, - bool to_vm) +static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm) { const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0; const unsigned long start_vm = data->io->addr; @@ -525,6 +524,16 @@ static inline int ublk_copy_user_pages(struct ublk_map_data *data, return done; } +static inline bool ublk_need_map_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE; +} + +static inline bool ublk_need_unmap_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ; +} + static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, struct ublk_io *io) { @@ -535,7 +544,7 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, * context and the big benefit is that pinning pages in current * context is pretty fast, see ublk_pin_user_pages */ - if (ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE) { + if (ublk_need_map_req(req)) { struct ublk_map_data data = { .ubq = ubq, .rq = req, @@ -556,7 +565,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) { + if (ublk_need_unmap_req(req)) { struct ublk_map_data data = { .ubq = ubq, .rq = req, @@ -770,7 +779,7 @@ static inline void __ublk_rq_task_work(struct request *req) return; } - if (ublk_need_get_data(ubq) && (req_op(req) == REQ_OP_WRITE)) { + if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) { /* * We have not handled UBLK_IO_NEED_GET_DATA command yet, * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv -- cgit v1.2.3 From 96cf2f5404c8bc979628a2b495852d735a56c5b5 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Mar 2023 19:36:23 +0800 Subject: block: ublk_drv: clean up several helpers Convert the following pattern in several helpers if (Z) return true return false into: return Z; Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 439bd5773715..a40edc437c9f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -298,9 +298,7 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) static inline bool ublk_need_get_data(const struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_NEED_GET_DATA) - return true; - return false; + return ubq->flags & UBLK_F_NEED_GET_DATA; } static struct ublk_device *ublk_get_device(struct ublk_device *ub) @@ -349,25 +347,19 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) static inline bool ublk_queue_can_use_recovery_reissue( struct ublk_queue *ubq) { - if ((ubq->flags & UBLK_F_USER_RECOVERY) && - (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE)) - return true; - return false; + return (ubq->flags & UBLK_F_USER_RECOVERY) && + (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE); } static inline bool ublk_queue_can_use_recovery( struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ubq->flags & UBLK_F_USER_RECOVERY; } static inline bool ublk_can_use_recovery(struct ublk_device *ub) { - if (ub->dev_info.flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ub->dev_info.flags & UBLK_F_USER_RECOVERY; } static void ublk_free_disk(struct gendisk *disk) -- cgit v1.2.3 From ae9f5ccea4c268a96763e51239b32d6b5172c18c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 30 Mar 2023 19:36:24 +0800 Subject: block: ublk_drv: cleanup 'struct ublk_map_data' 'struct ublk_map_data' is passed to ublk_copy_user_pages() for copying data between userspace buffer and request pages. Here what matters is userspace buffer address/len and 'struct request', so replace ->io field with user buffer address, and rename max_bytes as len. Meantime remove 'ubq' field from ublk_map_data, since it isn't used any more. Then code becomes more readable. Reviewed-by: Ziyang Zhang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index a40edc437c9f..1223fcbfc6c9 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -420,10 +420,9 @@ static const struct block_device_operations ub_fops = { #define UBLK_MAX_PIN_PAGES 32 struct ublk_map_data { - const struct ublk_queue *ubq; const struct request *rq; - const struct ublk_io *io; - unsigned max_bytes; + unsigned long ubuf; + unsigned int len; }; struct ublk_io_iter { @@ -483,14 +482,14 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data, static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm) { const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0; - const unsigned long start_vm = data->io->addr; + const unsigned long start_vm = data->ubuf; unsigned int done = 0; struct ublk_io_iter iter = { .pg_off = start_vm & (PAGE_SIZE - 1), .bio = data->rq->bio, .iter = data->rq->bio->bi_iter, }; - const unsigned int nr_pages = round_up(data->max_bytes + + const unsigned int nr_pages = round_up(data->len + (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT; while (done < nr_pages) { @@ -503,13 +502,13 @@ static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm) iter.pages); if (iter.nr_pages <= 0) return done == 0 ? iter.nr_pages : done; - len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm); + len = ublk_copy_io_pages(&iter, data->len, to_vm); for (i = 0; i < iter.nr_pages; i++) { if (to_vm) set_page_dirty(iter.pages[i]); put_page(iter.pages[i]); } - data->max_bytes -= len; + data->len -= len; done += iter.nr_pages; } @@ -538,15 +537,14 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, */ if (ublk_need_map_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = rq_bytes, + .ubuf = io->addr, + .len = rq_bytes, }; ublk_copy_user_pages(&data, true); - return rq_bytes - data.max_bytes; + return rq_bytes - data.len; } return rq_bytes; } @@ -559,17 +557,16 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, if (ublk_need_unmap_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = io->res, + .ubuf = io->addr, + .len = io->res, }; WARN_ON_ONCE(io->res > rq_bytes); ublk_copy_user_pages(&data, false); - return io->res - data.max_bytes; + return io->res - data.len; } return rq_bytes; } -- cgit v1.2.3 From bb4c19e030f45c5416f1eb4daa94fbaf7165e9ea Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 27 Mar 2023 23:37:33 +0900 Subject: block: null_blk: make fault-injection dynamically configurable per device The null_blk driver has multiple driver-specific fault injection mechanisms. Each fault injection configuration can only be specified by a module parameter and cannot be reconfigured without reloading the driver. Also, each configuration is common to all devices and is initialized every time a new device is added. This change adds the following subdirectories for each null_blk device. /sys/kernel/config/nullb//timeout_inject /sys/kernel/config/nullb//requeue_inject /sys/kernel/config/nullb//init_hctx_fault_inject Each fault injection attribute can be dynamically set per device by a corresponding file in these directories. Signed-off-by: Akinobu Mita Link: https://lore.kernel.org/r/20230327143733.14599-3-akinobu.mita@gmail.com Signed-off-by: Jens Axboe --- Documentation/fault-injection/fault-injection.rst | 8 ++ drivers/block/null_blk/Kconfig | 2 +- drivers/block/null_blk/main.c | 93 ++++++++++++++++++----- drivers/block/null_blk/null_blk.h | 7 +- 4 files changed, 87 insertions(+), 23 deletions(-) (limited to 'drivers/block') diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index 08e420e10973..b64809514b0f 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -52,6 +52,14 @@ Available fault injection capabilities status code is NVME_SC_INVALID_OPCODE with no retry. The status code and retry flag can be set via the debugfs. +- Null test block driver fault injection + + inject IO timeouts by setting config items under + /sys/kernel/config/nullb//timeout_inject, + inject requeue requests by setting config items under + /sys/kernel/config/nullb//requeue_inject, and + inject init_hctx() errors by setting config items under + /sys/kernel/config/nullb//init_hctx_fault_inject. Configure fault-injection capabilities behavior ----------------------------------------------- diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig index 6bf1f8ca20a2..ff23bb9346d0 100644 --- a/drivers/block/null_blk/Kconfig +++ b/drivers/block/null_blk/Kconfig @@ -9,4 +9,4 @@ config BLK_DEV_NULL_BLK config BLK_DEV_NULL_BLK_FAULT_INJECTION bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION_CONFIGFS diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 1d914a82f863..9a7e5699369e 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -250,7 +250,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache); static inline struct nullb_device *to_nullb_device(struct config_item *item) { - return item ? container_of(item, struct nullb_device, item) : NULL; + return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL; } static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) @@ -593,8 +593,29 @@ static const struct config_item_type nullb_device_type = { .ct_owner = THIS_MODULE, }; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static void nullb_add_fault_config(struct nullb_device *dev) +{ + fault_config_init(&dev->timeout_config, "timeout_inject"); + fault_config_init(&dev->requeue_config, "requeue_inject"); + fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject"); + + configfs_add_default_group(&dev->timeout_config.group, &dev->group); + configfs_add_default_group(&dev->requeue_config.group, &dev->group); + configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group); +} + +#else + +static void nullb_add_fault_config(struct nullb_device *dev) +{ +} + +#endif + static struct -config_item *nullb_group_make_item(struct config_group *group, const char *name) +config_group *nullb_group_make_group(struct config_group *group, const char *name) { struct nullb_device *dev; @@ -605,9 +626,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name) if (!dev) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&dev->item, name, &nullb_device_type); + config_group_init_type_name(&dev->group, name, &nullb_device_type); + nullb_add_fault_config(dev); - return &dev->item; + return &dev->group; } static void @@ -645,7 +667,7 @@ static struct configfs_attribute *nullb_group_attrs[] = { }; static struct configfs_group_operations nullb_group_ops = { - .make_item = nullb_group_make_item, + .make_group = nullb_group_make_group, .drop_item = nullb_group_drop_item, }; @@ -676,6 +698,13 @@ static struct nullb_device *null_alloc_dev(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + dev->timeout_config.attr = null_timeout_attr; + dev->requeue_config.attr = null_requeue_attr; + dev->init_hctx_fault_config.attr = null_init_hctx_attr; +#endif + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); if (badblocks_init(&dev->badblocks, 0)) { @@ -1515,24 +1544,48 @@ static void null_submit_bio(struct bio *bio) null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); } +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static bool should_timeout_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->timeout_config.attr, 1); +} + +static bool should_requeue_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->requeue_config.attr, 1); +} + +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return should_fail(&dev->init_hctx_fault_config.attr, 1); +} + +#else + static bool should_timeout_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_timeout_str[0]) - return should_fail(&null_timeout_attr, 1); -#endif return false; } static bool should_requeue_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_requeue_str[0]) - return should_fail(&null_requeue_attr, 1); -#endif return false; } +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return false; +} + +#endif + static void null_map_queues(struct blk_mq_tag_set *set) { struct nullb *nullb = set->driver_data; @@ -1729,10 +1782,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, struct nullb *nullb = hctx->queue->queuedata; struct nullb_queue *nq; -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1)) + if (should_init_hctx_fail(nullb->dev)) return -EFAULT; -#endif nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; @@ -2052,9 +2103,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - if (!null_setup_fault()) - goto out_cleanup_tags; - nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); if (IS_ERR(nullb->disk)) { @@ -2116,10 +2164,10 @@ static int null_add_dev(struct nullb_device *dev) null_config_discard(nullb); - if (config_item_name(&dev->item)) { + if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), - "%s", config_item_name(&dev->item)); + "%s", config_item_name(&dev->group.cg_item)); } else { sprintf(nullb->disk_name, "nullb%d", nullb->index); } @@ -2219,6 +2267,9 @@ static int __init null_init(void) g_home_node = NUMA_NO_NODE; } + if (!null_setup_fault()) + return -EINVAL; + if (g_queue_mode == NULL_Q_RQ) { pr_err("legacy IO path is no longer available\n"); return -EINVAL; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index eb5972c50be8..929f659dd255 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -69,7 +69,12 @@ enum { struct nullb_device { struct nullb *nullb; - struct config_item item; + struct config_group group; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + struct fault_config timeout_config; + struct fault_config requeue_config; + struct fault_config init_hctx_fault_config; +#endif struct radix_tree_root data; /* data stored in the disk */ struct radix_tree_root cache; /* disk cache data */ unsigned long flags; /* device flags */ -- cgit v1.2.3 From 2d786e66c9662d84cbeab981ce3a371d2fb5a4bb Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 18 Apr 2023 21:18:10 +0800 Subject: block: ublk: switch to ioctl command encoding All ublk commands(control, IO) should have taken ioctl command encoding from the beginning, because ioctl command encoding defines each code uniquely, so driver can figure out wrong command sent from userspace easily; 2) it might help security subsystem for audit uring cmd[1]. Unfortunately we didn't do that way, and it could be one lesson for ublk driver. So switch to ioctl command encoding now, we still support commands encoded in old way, but they become legacy definition. Any new command should take ioctl encoding. See ublksrv code for switching to ioctl command encoding in [2]. [1] https://lore.kernel.org/io-uring/CAHC9VhSVzujW9LOj5Km80AjU0EfAuukoLrxO6BEfnXeK_s6bAg@mail.gmail.com/ [2] https://github.com/ming1/ubdsrv/commits/ioctl_cmd_encoding Cc: Christoph Hellwig Cc: Ken Kurematsu Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230418131810.855959-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 17 ++++++++++++++++ drivers/block/ublk_drv.c | 47 +++++++++++++++++++++++++++++++------------ include/uapi/linux/ublk_cmd.h | 43 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f79f20430ef7..5b9d4aaebb81 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -385,6 +385,23 @@ config BLK_DEV_UBLK can handle batch more effectively, but task_work_add() isn't exported for module, so ublk has to be built to kernel. +config BLKDEV_UBLK_LEGACY_OPCODES + bool "Support legacy command opcode" + depends on BLK_DEV_UBLK + default y + help + ublk driver started to take plain command encoding, which turns out + one bad way. The traditional ioctl command opcode encodes more + info and basically defines each code uniquely, so opcode conflict + is avoided, and driver can handle wrong command easily, meantime it + may help security subsystem to audit io_uring command. + + Say Y if your application still uses legacy command opcode. + + Say N if you don't want to support legacy command opcode. It is + suggested to enable N if your application(ublk server) switches to + ioctl command encoding. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1223fcbfc6c9..5da5876a4443 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -53,7 +53,8 @@ | UBLK_F_NEED_GET_DATA \ | UBLK_F_USER_RECOVERY \ | UBLK_F_USER_RECOVERY_REISSUE \ - | UBLK_F_UNPRIVILEGED_DEV) + | UBLK_F_UNPRIVILEGED_DEV \ + | UBLK_F_CMD_IOCTL_ENCODE) /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \ @@ -1253,6 +1254,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, ublk_queue_cmd(ubq, req); } +static inline int ublk_check_cmd_op(u32 cmd_op) +{ + u32 ioc_type = _IOC_TYPE(cmd_op); + + if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u') + return -EOPNOTSUPP; + + if (ioc_type != 'u' && ioc_type != 0) + return -EOPNOTSUPP; + + return 0; +} + static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; @@ -1294,10 +1308,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA. */ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) - ^ (cmd_op == UBLK_IO_NEED_GET_DATA)) + ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) + goto out; + + ret = ublk_check_cmd_op(cmd_op); + if (ret) goto out; - switch (cmd_op) { + switch (_IOC_NR(cmd_op)) { case UBLK_IO_FETCH_REQ: /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ if (ublk_queue_ready(ubq)) { @@ -1743,6 +1761,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK)) ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK; + ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE; + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; @@ -2099,7 +2119,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, * know if the specified device is created as unprivileged * mode. */ - if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2) + if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2) return 0; } @@ -2125,7 +2145,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, dev_path[header->dev_path_len] = 0; ret = -EINVAL; - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd->cmd_op)) { case UBLK_CMD_GET_DEV_INFO: case UBLK_CMD_GET_DEV_INFO2: case UBLK_CMD_GET_QUEUE_AFFINITY: @@ -2164,6 +2184,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; struct ublk_device *ub = NULL; + u32 cmd_op = cmd->cmd_op; int ret = -EINVAL; if (issue_flags & IO_URING_F_NONBLOCK) @@ -2174,22 +2195,22 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (!(issue_flags & IO_URING_F_SQE128)) goto out; - if (cmd->cmd_op != UBLK_CMD_ADD_DEV) { + ret = ublk_check_cmd_op(cmd_op); + if (ret) + goto out; + + if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) { ret = -ENODEV; ub = ublk_get_device_from_id(header->dev_id); if (!ub) goto out; ret = ublk_ctrl_uring_cmd_permission(ub, cmd); - } else { - /* ADD_DEV permission check is done in command handler */ - ret = 0; + if (ret) + goto put_dev; } - if (ret) - goto put_dev; - - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd_op)) { case UBLK_CMD_START_DEV: ret = ublk_ctrl_start_dev(ub, cmd); break; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index f6238ccc7800..640bf687b94a 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -8,6 +8,9 @@ /* * Admin commands, issued by ublk server, and handled by ublk driver. + * + * Legacy command definition, don't use in new application, and don't + * add new such definition any more */ #define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 #define UBLK_CMD_GET_DEV_INFO 0x02 @@ -21,6 +24,30 @@ #define UBLK_CMD_END_USER_RECOVERY 0x11 #define UBLK_CMD_GET_DEV_INFO2 0x12 +/* Any new ctrl command should encode by __IO*() */ +#define UBLK_U_CMD_GET_QUEUE_AFFINITY \ + _IOR('u', UBLK_CMD_GET_QUEUE_AFFINITY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_DEV_INFO \ + _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_ADD_DEV \ + _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_DEL_DEV \ + _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_START_DEV \ + _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_STOP_DEV \ + _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_SET_PARAMS \ + _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_PARAMS \ + _IOR('u', UBLK_CMD_GET_PARAMS, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_START_USER_RECOVERY \ + _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_END_USER_RECOVERY \ + _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd) +#define UBLK_U_CMD_GET_DEV_INFO2 \ + _IOR('u', UBLK_CMD_GET_DEV_INFO2, struct ublksrv_ctrl_cmd) + /* * IO commands, issued by ublk server, and handled by ublk driver. * @@ -41,10 +68,23 @@ * It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag * while starting a ublk device. */ + +/* + * Legacy IO command definition, don't use in new application, and don't + * add new such definition any more + */ #define UBLK_IO_FETCH_REQ 0x20 #define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 #define UBLK_IO_NEED_GET_DATA 0x22 +/* Any new IO command should encode by __IOWR() */ +#define UBLK_U_IO_FETCH_REQ \ + _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd) +#define UBLK_U_IO_COMMIT_AND_FETCH_REQ \ + _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd) +#define UBLK_U_IO_NEED_GET_DATA \ + _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd) + /* only ABORT means that no re-fetch */ #define UBLK_IO_RES_OK 0 #define UBLK_IO_RES_NEED_GET_DATA 1 @@ -102,6 +142,9 @@ */ #define UBLK_F_UNPRIVILEGED_DEV (1UL << 5) +/* use ioctl encoding for uring command */ +#define UBLK_F_CMD_IOCTL_ENCODE (1UL << 6) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 -- cgit v1.2.3 From 63f8793ee60513a09f110ea460a6ff2c33811cdb Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Sun, 16 Apr 2023 15:03:39 -0700 Subject: null_blk: Always check queue mode setting from configfs Make sure to check device queue mode in the null_validate_conf() and return error for NULL_Q_RQ as we don't allow legacy I/O path, without this patch we get OOPs when queue mode is set to 1 from configfs, following are repro steps :- modprobe null_blk nr_devices=0 mkdir config/nullb/nullb0 echo 1 > config/nullb/nullb0/memory_backed echo 4096 > config/nullb/nullb0/blocksize echo 20480 > config/nullb/nullb0/size echo 1 > config/nullb/nullb0/queue_mode echo 1 > config/nullb/nullb0/power Entering kdb (current=0xffff88810acdd080, pid 2372) on processor 42 Oops: (null) due to oops @ 0xffffffffc041c329 CPU: 42 PID: 2372 Comm: sh Tainted: G O N 6.3.0-rc5lblk+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:null_add_dev.part.0+0xd9/0x720 [null_blk] Code: 01 00 00 85 d2 0f 85 a1 03 00 00 48 83 bb 08 01 00 00 00 0f 85 f7 03 00 00 80 bb 62 01 00 00 00 48 8b 75 20 0f 85 6d 02 00 00 <48> 89 6e 60 48 8b 75 20 bf 06 00 00 00 e8 f5 37 2c c1 48 8b 75 20 RSP: 0018:ffffc900052cbde0 EFLAGS: 00010246 RAX: 0000000000000001 RBX: ffff88811084d800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888100042e00 RBP: ffff8881053d8200 R08: ffffc900052cbd68 R09: ffff888105db2000 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000002 R13: ffff888104765200 R14: ffff88810eec1748 R15: ffff88810eec1740 FS: 00007fd445fd1740(0000) GS:ffff8897dfc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000060 CR3: 0000000166a00000 CR4: 0000000000350ee0 DR0: ffffffff8437a488 DR1: ffffffff8437a489 DR2: ffffffff8437a48a DR3: ffffffff8437a48b DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: nullb_device_power_store+0xd1/0x120 [null_blk] configfs_write_iter+0xb4/0x120 vfs_write+0x2ba/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc RIP: 0033:0x7fd4460c57a7 Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffd3792a4a8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fd4460c57a7 RDX: 0000000000000002 RSI: 000055b43c02e4c0 RDI: 0000000000000001 RBP: 000055b43c02e4c0 R08: 000000000000000a R09: 00007fd44615b4e0 R10: 00007fd44615b3e0 R11: 0000000000000246 R12: 0000000000000002 R13: 00007fd446198520 R14: 0000000000000002 R15: 00007fd446198700 Signed-off-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Ming Lei Reviewed-by: Nitesh Shetty Link: https://lore.kernel.org/r/20230416220339.43845-1-kch@nvidia.com Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9a7e5699369e..fcb61f1d5437 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2001,6 +2001,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) static int null_validate_conf(struct nullb_device *dev) { + if (dev->queue_mode == NULL_Q_RQ) { + pr_err("legacy IO path is no longer available\n"); + return -EINVAL; + } + dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); -- cgit v1.2.3 From 7c75661c42a06fe35e1774373194f646b5a9e5c9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 20 Apr 2023 17:11:04 +0800 Subject: ublk: don't return 0 in case of any failure Commit 2d786e66c966 ("block: ublk: switch to ioctl command encoding") starts to reset local variable of 'ret' as zero, then if any failure happens when handling the three IO commands, 0 can be returned to ublk server. Fix it by returning -EINVAL in case of command handling failure. Cc: Christoph Hellwig Fixes: 2d786e66c966 ("block: ublk: switch to ioctl command encoding") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230420091104.1092972-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 5da5876a4443..253008b2091d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1315,6 +1315,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) if (ret) goto out; + ret = -EINVAL; switch (_IOC_NR(cmd_op)) { case UBLK_IO_FETCH_REQ: /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ -- cgit v1.2.3 From 55793ea54d77719a071b1ccc05a05056e3b5e009 Mon Sep 17 00:00:00 2001 From: Zhong Jinghua Date: Mon, 6 Feb 2023 22:58:05 +0800 Subject: nbd: fix incomplete validation of ioctl arg We tested and found an alarm caused by nbd_ioctl arg without verification. The UBSAN warning calltrace like below: UBSAN: Undefined behaviour in fs/buffer.c:1709:35 signed integer overflow: -9223372036854775808 - 1 cannot be represented in type 'long long int' CPU: 3 PID: 2523 Comm: syz-executor.0 Not tainted 4.19.90 #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x3f0 arch/arm64/kernel/time.c:78 show_stack+0x28/0x38 arch/arm64/kernel/traps.c:158 __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x170/0x1dc lib/dump_stack.c:118 ubsan_epilogue+0x18/0xb4 lib/ubsan.c:161 handle_overflow+0x188/0x1dc lib/ubsan.c:192 __ubsan_handle_sub_overflow+0x34/0x44 lib/ubsan.c:206 __block_write_full_page+0x94c/0xa20 fs/buffer.c:1709 block_write_full_page+0x1f0/0x280 fs/buffer.c:2934 blkdev_writepage+0x34/0x40 fs/block_dev.c:607 __writepage+0x68/0xe8 mm/page-writeback.c:2305 write_cache_pages+0x44c/0xc70 mm/page-writeback.c:2240 generic_writepages+0xdc/0x148 mm/page-writeback.c:2329 blkdev_writepages+0x2c/0x38 fs/block_dev.c:2114 do_writepages+0xd4/0x250 mm/page-writeback.c:2344 The reason for triggering this warning is __block_write_full_page() -> i_size_read(inode) - 1 overflow. inode->i_size is assigned in __nbd_ioctl() -> nbd_set_size() -> bytesize. We think it is necessary to limit the size of arg to prevent errors. Moreover, __nbd_ioctl() -> nbd_add_socket(), arg will be cast to int. Assuming the value of arg is 0x80000000000000001) (on a 64-bit machine), it will become 1 after the coercion, which will return unexpected results. Fix it by adding checks to prevent passing in too large numbers. Signed-off-by: Zhong Jinghua Reviewed-by: Yu Kuai Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20230206145805.2645671-1-zhongjinghua@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c0b1611b9665..d445fd0934bd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -325,6 +325,9 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (blk_validate_block_size(blksize)) return -EINVAL; + if (bytesize < 0) + return -EINVAL; + nbd->config->bytesize = bytesize; nbd->config->blksize_bits = __ffs(blksize); @@ -1111,6 +1114,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; + /* Arg will be cast to int, check it to avoid overflow */ + if (arg > INT_MAX) + return -EINVAL; sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; -- cgit v1.2.3