diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 12:52:58 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-26 12:52:58 -0700 |
commit | 9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a (patch) | |
tree | c70bb7d65a50a51686378b6113a8663e0e60d9b8 /drivers/block | |
parent | 5b9a7bb72fddbc5247f56ede55d485fab7abdf92 (diff) | |
parent | 55793ea54d77719a071b1ccc05a05056e3b5e009 (diff) | |
download | lwn-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.tar.gz lwn-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.zip |
Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- drbd patches, bringing us closer to unifying the out-of-tree version
and the in tree one (Andreas, Christoph)
- support for auto-quiesce for the s390 dasd driver (Stefan)
- MD pull request via Song:
- md/bitmap: Optimal last page size (Jon Derrick)
- Various raid10 fixes (Yu Kuai, Li Nan)
- md: add error_handlers for raid0 and linear (Mariusz Tkaczyk)
- NVMe pull request via Christoph:
- Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas)
- Validate nvmet module parameters (Chaitanya Kulkarni)
- Fence TCP socket on receive error (Chris Leech)
- Fix async event trace event (Keith Busch)
- Minor cleanups (Chaitanya Kulkarni, zhenwei pi)
- Fix and cleanup nvmet Identify handling (Damien Le Moal,
Christoph Hellwig)
- Fix double blk_mq_complete_request race in the timeout handler
(Lei Yin)
- Fix irq locking in nvme-fcloop (Ming Lei)
- Remove queue mapping helper for rdma devices (Sagi Grimberg)
- use structured request attribute checks for nbd (Jakub)
- fix blk-crypto race conditions between keyslot management (Eric)
- add sed-opal support for reading read locking range attributes
(Ondrej)
- make fault injection configurable for null_blk (Akinobu)
- clean up the request insertion API (Christoph)
- clean up the queue running API (Christoph)
- blkg config helper cleanups (Tejun)
- lazy init support for blk-iolatency (Tejun)
- various fixes and tweaks to ublk (Ming)
- remove hybrid polling. It hasn't really been useful since we got
async polled IO support, and these days we don't support sync polled
IO at all (Keith)
- misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming,
Chaitanya, me)
* tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits)
nbd: fix incomplete validation of ioctl arg
ublk: don't return 0 in case of any failure
sed-opal: geometry feature reporting command
null_blk: Always check queue mode setting from configfs
block: ublk: switch to ioctl command encoding
blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush
block, bfq: Fix division by zero error on zero wsum
fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m
block: store bdev->bd_disk->fops->submit_bio state in bdev
block: re-arrange the struct block_device fields for better layout
md/raid5: remove unused working_disks variable
md/raid10: don't call bio_start_io_acct twice for bio which experienced read error
md/raid10: fix memleak of md thread
md/raid10: fix memleak for 'conf->bio_split'
md/raid10: fix leak of 'r10bio->remaining' for recovery
md/raid10: don't BUG_ON() in raise_barrier()
md: fix soft lockup in status_resync
md: add error_handlers for raid0 and linear
md: Use optimal I/O size for last bitmap page
md: Fix types in sb writer
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 17 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 13 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 120 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 72 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 19 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 102 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 30 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 11 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 29 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 114 | ||||
-rw-r--r-- | drivers/block/nbd.c | 15 | ||||
-rw-r--r-- | drivers/block/null_blk/Kconfig | 2 | ||||
-rw-r--r-- | drivers/block/null_blk/main.c | 135 | ||||
-rw-r--r-- | drivers/block/null_blk/null_blk.h | 7 | ||||
-rw-r--r-- | drivers/block/ublk_drv.c | 133 |
16 files changed, 491 insertions, 341 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f79f20430ef7..5b9d4aaebb81 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -385,6 +385,23 @@ config BLK_DEV_UBLK can handle batch more effectively, but task_work_add() isn't exported for module, so ublk has to be built to kernel. +config BLKDEV_UBLK_LEGACY_OPCODES + bool "Support legacy command opcode" + depends on BLK_DEV_UBLK + default y + help + ublk driver started to take plain command encoding, which turns out + one bad way. The traditional ioctl command opcode encodes more + info and basically defines each code uniquely, so opcode conflict + is avoided, and driver can handle wrong command easily, meantime it + may help security subsystem to audit io_uring command. + + Say Y if your application still uses legacy command opcode. + + Say N if you don't want to support legacy command opcode. It is + suggested to enable N if your application(ublk server) switches to + ioctl command encoding. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 429255876800..64b3a1c76f03 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -735,8 +735,9 @@ static bool update_rs_extent(struct drbd_device *device, return false; } -void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) +void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go) { + struct drbd_device *device = peer_device->device; unsigned long now = jiffies; unsigned long last = device->rs_mark_time[device->rs_last_mark]; int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS; @@ -819,7 +820,7 @@ static int update_sync_bits(struct drbd_device *device, if (mode == SET_IN_SYNC) { unsigned long still_to_go = drbd_bm_total_weight(device); bool rs_is_done = (still_to_go <= device->rs_failed); - drbd_advance_rs_marks(device, still_to_go); + drbd_advance_rs_marks(first_peer_device(device), still_to_go); if (cleared || rs_is_done) maybe_schedule_on_disk_bitmap_update(device, rs_is_done); } else if (mode == RECORD_RS_FAILED) @@ -843,10 +844,11 @@ static bool plausible_request_size(int size) * called by worker on C_SYNC_TARGET and receiver on SyncSource. * */ -int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode) { /* Is called from worker and receiver context _only_ */ + struct drbd_device *device = peer_device->device; unsigned long sbnr, ebnr, lbnr; unsigned long count = 0; sector_t esector, nr_sectors; @@ -1009,14 +1011,15 @@ retry: * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN * if there is still application IO going on in this area. */ -int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) +int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector) { + struct drbd_device *device = peer_device->device; unsigned int enr = BM_SECT_TO_EXT(sector); const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT; struct lc_element *e; struct bm_extent *bm_ext; int i; - bool throttle = drbd_rs_should_slow_down(device, sector, true); + bool throttle = drbd_rs_should_slow_down(peer_device, sector, true); /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, * not yet BME_LOCKED) extent needs to be kicked out explicitly if we diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 289876ffbc31..6ac8c54b44c7 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1216,7 +1216,9 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned * drbd_bm_read() - Read the whole bitmap from its on disk location. * @device: DRBD device. */ -int drbd_bm_read(struct drbd_device *device) __must_hold(local) +int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { return bm_rw(device, BM_AIO_READ, 0); } @@ -1227,7 +1229,8 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local) * * Will only write pages that have changed since last IO. */ -int drbd_bm_write(struct drbd_device *device) __must_hold(local) +int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, 0, 0); } @@ -1238,7 +1241,8 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local) * * Will write all pages. */ -int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); } @@ -1264,7 +1268,8 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho * verify is aborted due to a failed peer disk, while local IO continues, or * pending resync acks are still being processed. */ -int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) +int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { return bm_rw(device, BM_AIO_COPY_PAGES, 0); } diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d89b7d03d4c8..a30a5ed811be 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -66,6 +66,7 @@ extern int drbd_proc_details; struct drbd_device; struct drbd_connection; +struct drbd_peer_device; /* Defines to control fault insertion */ enum { @@ -126,8 +127,8 @@ struct bm_xfer_ctx { unsigned bytes[2]; }; -extern void INFO_bm_xfer_stats(struct drbd_device *device, - const char *direction, struct bm_xfer_ctx *c); +extern void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, + const char *direction, struct bm_xfer_ctx *c); static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c) { @@ -541,9 +542,10 @@ struct drbd_md_io { struct bm_io_work { struct drbd_work w; + struct drbd_peer_device *peer_device; char *why; enum bm_flag flags; - int (*io_fn)(struct drbd_device *device); + int (*io_fn)(struct drbd_device *device, struct drbd_peer_device *peer_device); void (*done)(struct drbd_device *device, int rv); }; @@ -1041,7 +1043,7 @@ extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector, enum drbd_packet cmd); extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size); -extern int drbd_send_bitmap(struct drbd_device *device); +extern int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device); extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *); @@ -1065,17 +1067,22 @@ extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold extern int drbd_md_test_flag(struct drbd_backing_dev *, int); extern void drbd_md_mark_dirty(struct drbd_device *device); extern void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags); + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); extern int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); -extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device); +extern int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); /* Meta data layout * @@ -1284,14 +1291,18 @@ extern void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); -extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_read(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); -extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); -extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); -extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); +extern int drbd_bm_write_copy_pages(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local); extern size_t drbd_bm_words(struct drbd_device *device); extern unsigned long drbd_bm_bits(struct drbd_device *device); extern sector_t drbd_bm_capacity(struct drbd_device *device); @@ -1422,21 +1433,24 @@ void drbd_resync_after_changed(struct drbd_device *device); extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side); extern void resume_next_sg(struct drbd_device *device); extern void suspend_other_sg(struct drbd_device *device); -extern int drbd_resync_finished(struct drbd_device *device); +extern int drbd_resync_finished(struct drbd_peer_device *peer_device); /* maybe rather drbd_main.c ? */ extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); extern void drbd_md_put_buffer(struct drbd_device *device); extern int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, enum req_op op); -extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int); +extern void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, + sector_t sector, int size); extern void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done); -extern void drbd_rs_controller_reset(struct drbd_device *device); +extern void drbd_rs_controller_reset(struct drbd_peer_device *peer_device); -static inline void ov_out_of_sync_print(struct drbd_device *device) +static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; + if (device->ov_last_oos_size) { - drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n", + drbd_err(peer_device, "Out of sync: start=%llu, size=%lu (sectors)\n", (unsigned long long)device->ov_last_oos_start, (unsigned long)device->ov_last_oos_size); } @@ -1475,7 +1489,7 @@ extern int drbd_ack_receiver(struct drbd_thread *thi); extern void drbd_send_ping_wf(struct work_struct *ws); extern void drbd_send_acks_wf(struct work_struct *ws); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); -extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +extern bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); @@ -1531,22 +1545,22 @@ extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); -extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector); +extern int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector); extern void drbd_rs_cancel_all(struct drbd_device *device); extern int drbd_rs_del_all(struct drbd_device *device); -extern void drbd_rs_failed_io(struct drbd_device *device, +extern void drbd_rs_failed_io(struct drbd_peer_device *peer_device, sector_t sector, int size); -extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); +extern void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go); enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; -extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, +extern int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size, enum update_sync_bits_mode mode); -#define drbd_set_in_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_IN_SYNC) -#define drbd_set_out_of_sync(device, sector, size) \ - __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC) -#define drbd_rs_failed_io(device, sector, size) \ - __drbd_change_sync(device, sector, size, RECORD_RS_FAILED) +#define drbd_set_in_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_IN_SYNC) +#define drbd_set_out_of_sync(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, SET_OUT_OF_SYNC) +#define drbd_rs_failed_io(peer_device, sector, size) \ + __drbd_change_sync(peer_device, sector, size, RECORD_RS_FAILED) extern void drbd_al_shrink(struct drbd_device *device); extern int drbd_al_initialize(struct drbd_device *, void *); @@ -1918,18 +1932,14 @@ static inline void inc_ap_pending(struct drbd_device *device) atomic_inc(&device->ap_pending_cnt); } -#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \ - if (atomic_read(&device->which) < 0) \ - drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \ - func, line, \ - atomic_read(&device->which)) - -#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__) -static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) +#define dec_ap_pending(device) ((void)expect((device), __dec_ap_pending(device) >= 0)) +static inline int __dec_ap_pending(struct drbd_device *device) { - if (atomic_dec_and_test(&device->ap_pending_cnt)) + int ap_pending_cnt = atomic_dec_return(&device->ap_pending_cnt); + + if (ap_pending_cnt == 0) wake_up(&device->misc_wait); - ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line); + return ap_pending_cnt; } /* counts how many resync-related answers we still expect from the peer @@ -1938,16 +1948,16 @@ static inline void _dec_ap_pending(struct drbd_device *device, const char *func, * C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER) * (or P_NEG_ACK with ID_SYNCER) */ -static inline void inc_rs_pending(struct drbd_device *device) +static inline void inc_rs_pending(struct drbd_peer_device *peer_device) { - atomic_inc(&device->rs_pending_cnt); + atomic_inc(&peer_device->device->rs_pending_cnt); } -#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__) -static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) +#define dec_rs_pending(peer_device) \ + ((void)expect((peer_device), __dec_rs_pending(peer_device) >= 0)) +static inline int __dec_rs_pending(struct drbd_peer_device *peer_device) { - atomic_dec(&device->rs_pending_cnt); - ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line); + return atomic_dec_return(&peer_device->device->rs_pending_cnt); } /* counts how many answers we still need to send to the peer. @@ -1964,18 +1974,16 @@ static inline void inc_unacked(struct drbd_device *device) atomic_inc(&device->unacked_cnt); } -#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__) -static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) +#define dec_unacked(device) ((void)expect(device, __dec_unacked(device) >= 0)) +static inline int __dec_unacked(struct drbd_device *device) { - atomic_dec(&device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_dec_return(&device->unacked_cnt); } -#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__) -static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) +#define sub_unacked(device, n) ((void)expect(device, __sub_unacked(device) >= 0)) +static inline int __sub_unacked(struct drbd_device *device, int n) { - atomic_sub(n, &device->unacked_cnt); - ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); + return atomic_sub_return(n, &device->unacked_cnt); } static inline bool is_sync_target_state(enum drbd_conns connection_state) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2c764f7ee4a7..83987e7a5ef2 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -231,9 +231,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, } req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { + struct drbd_peer_device *peer_device; if (req->epoch != expect_epoch) break; - _req_mod(req, BARRIER_ACKED); + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, BARRIER_ACKED, peer_device); } spin_unlock_irq(&connection->resource->req_lock); @@ -256,10 +258,13 @@ bail: /* must hold resource->req_lock */ void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what) { + struct drbd_peer_device *peer_device; struct drbd_request *req, *r; - list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) - _req_mod(req, what); + list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) { + peer_device = conn_peer_device(connection, req->device->vnr); + _req_mod(req, what, peer_device); + } } void tl_restart(struct drbd_connection *connection, enum drbd_req_event what) @@ -297,7 +302,7 @@ void tl_abort_disk_io(struct drbd_device *device) continue; if (req->device != device) continue; - _req_mod(req, ABORT_DISK_IO); + _req_mod(req, ABORT_DISK_IO, NULL); } spin_unlock_irq(&connection->resource->req_lock); } @@ -1198,10 +1203,11 @@ static int fill_bitmap_rle_bits(struct drbd_device *device, * code upon failure. */ static int -send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) +send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + struct drbd_device *device = peer_device->device; + struct drbd_socket *sock = &peer_device->connection->data; + unsigned int header_size = drbd_header_size(peer_device->connection); struct p_compressed_bm *p = sock->sbuf + header_size; int len, err; @@ -1212,7 +1218,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) if (len) { dcbp_set_code(p, RLE_VLI_Bits); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, + err = __send_command(peer_device->connection, device->vnr, sock, P_COMPRESSED_BITMAP, sizeof(*p) + len, NULL, 0); c->packets[0]++; @@ -1233,7 +1239,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) len = num_words * sizeof(*p); if (len) drbd_bm_get_lel(device, c->word_offset, num_words, p); - err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0); + err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP, + len, NULL, 0); c->word_offset += num_words; c->bit_offset = c->word_offset * BITS_PER_LONG; @@ -1245,7 +1252,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) } if (!err) { if (len == 0) { - INFO_bm_xfer_stats(device, "send", c); + INFO_bm_xfer_stats(peer_device, "send", c); return 0; } else return 1; @@ -1254,7 +1261,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c) } /* See the comment at receive_bitmap() */ -static int _drbd_send_bitmap(struct drbd_device *device) +static int _drbd_send_bitmap(struct drbd_device *device, + struct drbd_peer_device *peer_device) { struct bm_xfer_ctx c; int err; @@ -1266,7 +1274,7 @@ static int _drbd_send_bitmap(struct drbd_device *device) if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) { drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n"); drbd_bm_set_all(device); - if (drbd_bm_write(device)) { + if (drbd_bm_write(device, peer_device)) { /* write_bm did fail! Leave full sync flag set in Meta P_DATA * but otherwise process as per normal - need to tell other * side that a full resync is required! */ @@ -1285,20 +1293,20 @@ static int _drbd_send_bitmap(struct drbd_device *device) }; do { - err = send_bitmap_rle_or_plain(device, &c); + err = send_bitmap_rle_or_plain(peer_device, &c); } while (err > 0); return err == 0; } -int drbd_send_bitmap(struct drbd_device *device) +int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device) { - struct drbd_socket *sock = &first_peer_device(device)->connection->data; + struct drbd_socket *sock = &peer_device->connection->data; int err = -1; mutex_lock(&sock->mutex); if (sock->socket) - err = !_drbd_send_bitmap(device); + err = !_drbd_send_bitmap(device, peer_device); mutex_unlock(&sock->mutex); return err; } @@ -3406,7 +3414,9 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_set_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { int rv = -EIO; @@ -3414,7 +3424,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) drbd_md_sync(device); drbd_bm_set_all(device); - rv = drbd_bm_write(device); + rv = drbd_bm_write(device, peer_device); if (!rv) { drbd_md_clear_flag(device, MDF_FULL_SYNC); @@ -3430,11 +3440,13 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) +int drbd_bmio_clear_n_write(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) + { drbd_resume_al(device); drbd_bm_clear_all(device); - return drbd_bm_write(device); + return drbd_bm_write(device, peer_device); } static int w_bitmap_io(struct drbd_work *w, int unused) @@ -3453,7 +3465,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) if (get_ldev(device)) { drbd_bm_lock(device, work->why, work->flags); - rv = work->io_fn(device); + rv = work->io_fn(device, work->peer_device); drbd_bm_unlock(device); put_ldev(device); } @@ -3488,11 +3500,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused) * put_ldev(). */ void drbd_queue_bitmap_io(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), void (*done)(struct drbd_device *, int), - char *why, enum bm_flag flags) + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { - D_ASSERT(device, current == first_peer_device(device)->connection->worker.task); + D_ASSERT(device, current == peer_device->connection->worker.task); D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags)); D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags)); @@ -3501,6 +3514,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n", why, device->bm_io_work.why); + device->bm_io_work.peer_device = peer_device; device->bm_io_work.io_fn = io_fn; device->bm_io_work.done = done; device->bm_io_work.why = why; @@ -3512,7 +3526,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * application IO does not conflict anyways. */ if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) { if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags)) - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&peer_device->connection->sender_work, &device->bm_io_work.w); } spin_unlock_irq(&device->resource->req_lock); @@ -3528,8 +3542,10 @@ void drbd_queue_bitmap_io(struct drbd_device *device, * freezes application IO while that the actual IO operations runs. This * functions MAY NOT be called from worker context. */ -int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) +int drbd_bitmap_io(struct drbd_device *device, + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { /* Only suspend io, if some operation is supposed to be locked out */ const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST); @@ -3541,7 +3557,7 @@ int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device * drbd_suspend_io(device); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); if (do_suspend_io) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index f49f2a5282e1..1a5d3d72d91d 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1053,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + "size changed", BM_LOCKED_MASK, NULL); /* on-disk bitmap and activity log is authoritative again * (unless there was an IO error meanwhile...) */ @@ -2027,13 +2027,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_info(device, "Assuming that all blocks are out of sync " "(aka FullSync)\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from attaching", BM_LOCKED_MASK)) { + "set_n_write from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } } else { if (drbd_bitmap_io(device, &drbd_bm_read, - "read from attaching", BM_LOCKED_MASK)) { + "read from attaching", BM_LOCKED_MASK, + NULL)) { retcode = ERR_IO_MD_DISK; goto force_diskless_dec; } @@ -2972,7 +2974,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT)); if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, - "set_n_write from invalidate", BM_LOCKED_MASK)) + "set_n_write from invalidate", BM_LOCKED_MASK, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -3005,11 +3007,12 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) +static int drbd_bmio_set_susp_al(struct drbd_device *device, + struct drbd_peer_device *peer_device) __must_hold(local) { int rv; - rv = drbd_bmio_set_n_write(device); + rv = drbd_bmio_set_n_write(device, peer_device); drbd_suspend_al(device); return rv; } @@ -3052,7 +3055,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode >= SS_SUCCESS) { if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al, "set_n_write from invalidate_peer", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) retcode = ERR_IO_MD_DISK; } } else @@ -4148,7 +4151,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) if (args.clear_bm) { err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write, - "clear_n_write from new_c_uuid", BM_LOCKED_MASK); + "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL); if (err) { drbd_err(device, "Writing bitmap failed with %d\n", err); retcode = ERR_IO_MD_DISK; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e197b2a465d2..719a7260c22b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2044,11 +2044,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused) D_ASSERT(device, drbd_interval_empty(&peer_req->i)); if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req); } else { /* Record failure to sync */ - drbd_rs_failed_io(device, sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, sector, peer_req->i.size); err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); } @@ -2067,7 +2067,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto if (!peer_req) goto fail; - dec_rs_pending(device); + dec_rs_pending(peer_device); inc_unacked(device); /* corresponding dec_unacked() in e_end_resync_block() @@ -2138,7 +2138,7 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i err = recv_dless_read(peer_device, req, sector, pi->size); if (!err) - req_mod(req, DATA_RECEIVED); + req_mod(req, DATA_RECEIVED, peer_device); /* else: nothing. handled from drbd_disconnect... * I don't think we may complete this just yet * in case we are "on-disconnect: freeze" */ @@ -2196,7 +2196,7 @@ static void restart_conflicting_writes(struct drbd_device *device, continue; /* as it is RQ_POSTPONED, this will cause it to * be queued on the retry workqueue. */ - __req_mod(req, CONFLICT_RESOLVED, NULL); + __req_mod(req, CONFLICT_RESOLVED, NULL, NULL); } } @@ -2220,7 +2220,7 @@ static int e_end_block(struct drbd_work *w, int cancel) P_RS_WRITE_ACK : P_WRITE_ACK; err = drbd_send_ack(peer_device, pcmd, peer_req); if (pcmd == P_RS_WRITE_ACK) - drbd_set_in_sync(device, sector, peer_req->i.size); + drbd_set_in_sync(peer_device, sector, peer_req->i.size); } else { err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req); /* we expect it to be marked out of sync anyways... @@ -2420,6 +2420,7 @@ static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf) static void fail_postponed_requests(struct drbd_device *device, sector_t sector, unsigned int size) { + struct drbd_peer_device *peer_device = first_peer_device(device); struct drbd_interval *i; repeat: @@ -2433,7 +2434,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector, if (!(req->rq_state & RQ_POSTPONED)) continue; req->rq_state &= ~RQ_POSTPONED; - __req_mod(req, NEG_ACKED, &m); + __req_mod(req, NEG_ACKED, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) complete_master_bio(device, &m); @@ -2690,7 +2691,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * if (device->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); peer_req->flags &= ~EE_MAY_SET_IN_SYNC; drbd_al_begin_io(device, &peer_req->i); peer_req->flags |= EE_CALL_AL_COMPLETE_IO; @@ -2729,9 +2730,10 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, +bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector, bool throttle_if_app_is_waiting) { + struct drbd_device *device = peer_device->device; struct lc_element *tmp; bool throttle = drbd_rs_c_min_rate_throttle(device); @@ -2843,7 +2845,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet break; case P_OV_REPLY: verb = 0; - dec_rs_pending(device); + dec_rs_pending(peer_device); drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC); break; default: @@ -2914,7 +2916,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); peer_req->w.cb = w_e_end_ov_reply; - dec_rs_pending(device); + dec_rs_pending(peer_device); /* drbd_rs_begin_io done when we sent this request, * but accounting still needs to be done. */ goto submit_for_resync; @@ -2977,7 +2979,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet update_receiver_timing_details(connection, drbd_rs_should_slow_down); if (device->state.peer != R_PRIMARY - && drbd_rs_should_slow_down(device, sector, false)) + && drbd_rs_should_slow_down(peer_device, sector, false)) schedule_timeout_uninterruptible(HZ/10); update_receiver_timing_details(connection, drbd_rs_begin_io); if (drbd_rs_begin_io(device, sector)) @@ -3226,10 +3228,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, -1096 requires proto 96 */ -static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_peer_device *const peer_device, + enum drbd_role const peer_role, int *rule_nr) __must_hold(local) { - struct drbd_peer_device *const peer_device = first_peer_device(device); - struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; + struct drbd_connection *const connection = peer_device->connection; + struct drbd_device *device = peer_device->device; u64 self, peer; int i, j; @@ -3465,7 +3468,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]); - hg = drbd_uuid_compare(device, peer_role, &rule_nr); + hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr); spin_unlock_irq(&device->ldev->md.uuid_lock); drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr); @@ -3591,7 +3594,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device, if (abs(hg) >= 2) { drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake", - BM_LOCKED_SET_ALLOWED)) + BM_LOCKED_SET_ALLOWED, NULL)) return C_MASK; } @@ -4270,7 +4273,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n"); drbd_bitmap_io(device, &drbd_bmio_clear_n_write, "clear_n_write from receive_uuids", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, NULL); _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]); _drbd_uuid_set(device, UI_BITMAP, 0); _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), @@ -4448,7 +4451,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info else if (os.conn >= C_SYNC_SOURCE && peer_state.conn == C_CONNECTED) { if (drbd_bm_total_weight(device) <= device->rs_failed) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } } @@ -4456,8 +4459,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info /* explicit verify finished notification, stop sector reached. */ if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE && peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); return 0; } @@ -4766,11 +4769,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device, return -EIO; } -void INFO_bm_xfer_stats(struct drbd_device *device, +void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned int header_size = drbd_header_size(first_peer_device(device)->connection); + unsigned int header_size = drbd_header_size(peer_device->connection); unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size; unsigned int plain = header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) + @@ -4794,7 +4797,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device, r = 1000; r = 1000 - r; - drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " + drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), " "total %u; compression: %u.%u%%\n", direction, c->bytes[1], c->packets[1], @@ -4872,12 +4875,12 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info goto out; } - INFO_bm_xfer_stats(device, "receive", &c); + INFO_bm_xfer_stats(peer_device, "receive", &c); if (device->state.conn == C_WF_BITMAP_T) { enum drbd_state_rv rv; - err = drbd_send_bitmap(device); + err = drbd_send_bitmap(device, peer_device); if (err) goto out; /* Omit CS_ORDERED with this state transition to avoid deadlocks. */ @@ -4935,7 +4938,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet drbd_conn_str(device->state.conn)); } - drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); + drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize)); return 0; } @@ -4956,7 +4959,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev(device)) { struct drbd_peer_request *peer_req; @@ -5214,7 +5217,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device) if (get_ldev(device)) { drbd_bitmap_io(device, &drbd_bm_write_copy_pages, - "write from disconnected", BM_LOCKED_CHANGE_ALLOWED); + "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL); put_ldev(device); } @@ -5648,22 +5651,23 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info * if (get_ldev(device)) { drbd_rs_complete_io(device, sector); - drbd_set_in_sync(device, sector, blksize); + drbd_set_in_sync(peer_device, sector, blksize); /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); put_ldev(device); } - dec_rs_pending(device); + dec_rs_pending(peer_device); atomic_add(blksize >> 9, &device->rs_sect_in); return 0; } static int -validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector, +validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct rb_root *root, const char *func, enum drbd_req_event what, bool missing_ok) { + struct drbd_device *device = peer_device->device; struct drbd_request *req; struct bio_and_error m; @@ -5673,7 +5677,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto spin_unlock_irq(&device->resource->req_lock); return -EIO; } - __req_mod(req, what, &m); + __req_mod(req, what, peer_device, &m); spin_unlock_irq(&device->resource->req_lock); if (m.bio) @@ -5698,8 +5702,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - drbd_set_in_sync(device, sector, blksize); - dec_rs_pending(device); + drbd_set_in_sync(peer_device, sector, blksize); + dec_rs_pending(peer_device); return 0; } switch (pi->cmd) { @@ -5722,7 +5726,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info * BUG(); } - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, what, false); } @@ -5744,12 +5748,12 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (p->block_id == ID_SYNCER) { - dec_rs_pending(device); - drbd_rs_failed_io(device, sector, size); + dec_rs_pending(peer_device); + drbd_rs_failed_io(peer_device, sector, size); return 0; } - err = validate_req_change_req_state(device, p->block_id, sector, + err = validate_req_change_req_state(peer_device, p->block_id, sector, &device->write_requests, __func__, NEG_ACKED, true); if (err) { @@ -5758,7 +5762,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi request is no longer in the collision hash. */ /* In Protocol B we might already have got a P_RECV_ACK but then get a P_NEG_ACK afterwards. */ - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } return 0; } @@ -5780,7 +5784,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n", (unsigned long long)sector, be32_to_cpu(p->blksize)); - return validate_req_change_req_state(device, p->block_id, sector, + return validate_req_change_req_state(peer_device, p->block_id, sector, &device->read_requests, __func__, NEG_ACKED, false); } @@ -5803,13 +5807,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); - dec_rs_pending(device); + dec_rs_pending(peer_device); if (get_ldev_if_state(device, D_FAILED)) { drbd_rs_complete_io(device, sector); switch (pi->cmd) { case P_NEG_RS_DREPLY: - drbd_rs_failed_io(device, sector, size); + drbd_rs_failed_io(peer_device, sector, size); break; case P_RS_CANCEL: break; @@ -5866,21 +5870,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * update_peer_seq(peer_device, be32_to_cpu(p->seq_num)); if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); if (!get_ldev(device)) return 0; drbd_rs_complete_io(device, sector); - dec_rs_pending(device); + dec_rs_pending(peer_device); --device->ov_left; /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); if (device->ov_left == 0) { dw = kmalloc(sizeof(*dw), GFP_NOIO); @@ -5890,8 +5894,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info * drbd_queue_work(&peer_device->connection->sender_work, &dw->w); } else { drbd_err(device, "kmalloc(dw) failed."); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } } put_ldev(device); diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e36216d50753..380e6584a4ee 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -122,12 +122,13 @@ void drbd_req_destroy(struct kref *kref) * before it even was submitted or sent. * In that case we do not want to touch the bitmap at all. */ + struct drbd_peer_device *peer_device = first_peer_device(device); if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) { if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK)) - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS)) - drbd_set_in_sync(device, req->i.sector, req->i.size); + drbd_set_in_sync(peer_device, req->i.sector, req->i.size); } /* one might be tempted to move the drbd_al_complete_io @@ -552,12 +553,15 @@ static inline bool is_pending_write_protocol_A(struct drbd_request *req) * happen "atomically" within the req_lock, * and it enforces that we have to think in a very structured manner * about the "events" that may happen to a request during its life time ... + * + * + * peer_device == NULL means local disk */ int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m) { struct drbd_device *const device = req->device; - struct drbd_peer_device *const peer_device = first_peer_device(device); struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; struct net_conf *nc; int p, rv = 0; @@ -617,7 +621,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case READ_COMPLETED_WITH_ERROR: - drbd_set_out_of_sync(device, req->i.sector, req->i.size); + drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size); drbd_report_io_error(device, req); __drbd_chk_io_error(device, DRBD_READ_ERROR); fallthrough; @@ -1100,6 +1104,7 @@ static bool drbd_should_send_out_of_sync(union drbd_dev_state s) static int drbd_process_write_request(struct drbd_request *req) { struct drbd_device *device = req->device; + struct drbd_peer_device *peer_device = first_peer_device(device); int remote, send_oos; remote = drbd_should_do_remote(device->state); @@ -1115,7 +1120,7 @@ static int drbd_process_write_request(struct drbd_request *req) /* The only size==0 bios we expect are empty flushes. */ D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH); if (remote) - _req_mod(req, QUEUE_AS_DRBD_BARRIER); + _req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device); return remote; } @@ -1125,10 +1130,10 @@ static int drbd_process_write_request(struct drbd_request *req) D_ASSERT(device, !(remote && send_oos)); if (remote) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_WRITE); - } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size)) - _req_mod(req, QUEUE_FOR_SEND_OOS); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device); + } else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size)) + _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device); return remote; } @@ -1312,6 +1317,7 @@ static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { struct drbd_resource *resource = device->resource; + struct drbd_peer_device *peer_device = first_peer_device(device); const int rw = bio_data_dir(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; @@ -1375,8 +1381,8 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request /* We either have a private_bio, or we can read from remote. * Otherwise we had done the goto nodata above. */ if (req->private_bio == NULL) { - _req_mod(req, TO_BE_SENT); - _req_mod(req, QUEUE_FOR_NET_READ); + _req_mod(req, TO_BE_SENT, peer_device); + _req_mod(req, QUEUE_FOR_NET_READ, peer_device); } else no_remote = true; } @@ -1397,7 +1403,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request req->pre_submit_jif = jiffies; list_add_tail(&req->req_pending_local, &device->pending_completion[rw == WRITE]); - _req_mod(req, TO_BE_SUBMITTED); + _req_mod(req, TO_BE_SUBMITTED, NULL); /* but we need to give up the spinlock to submit */ submit_private_bio = true; } else if (no_remote) { diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index b4017b5c3fbc..9ae860e7591b 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -267,6 +267,7 @@ struct bio_and_error { extern void start_new_tl_epoch(struct drbd_connection *connection); extern void drbd_req_destroy(struct kref *kref); extern int __req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device, struct bio_and_error *m); extern void complete_master_bio(struct drbd_device *device, struct bio_and_error *m); @@ -280,14 +281,15 @@ extern void drbd_restart_request(struct drbd_request *req); /* use this if you don't want to deal with calling complete_master_bio() * outside the spinlock, e.g. when walking some list on cleanup. */ -static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) +static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what, + struct drbd_peer_device *peer_device) { struct drbd_device *device = req->device; struct bio_and_error m; int rv; /* __req_mod possibly frees req, do not touch req after that! */ - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); if (m.bio) complete_master_bio(device, &m); @@ -299,7 +301,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what) * of the lower level driver completion callback, so we need to * spin_lock_irqsave here. */ static inline int req_mod(struct drbd_request *req, - enum drbd_req_event what) + enum drbd_req_event what, + struct drbd_peer_device *peer_device) { unsigned long flags; struct drbd_device *device = req->device; @@ -307,7 +310,7 @@ static inline int req_mod(struct drbd_request *req, int rv; spin_lock_irqsave(&device->resource->req_lock, flags); - rv = __req_mod(req, what, &m); + rv = __req_mod(req, what, peer_device, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); if (m.bio) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 2aeea295fa28..287a8d1d3f70 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1222,9 +1222,11 @@ void drbd_resume_al(struct drbd_device *device) } /* helper for _drbd_set_state */ -static void set_ov_position(struct drbd_device *device, enum drbd_conns cs) +static void set_ov_position(struct drbd_peer_device *peer_device, enum drbd_conns cs) { - if (first_peer_device(device)->connection->agreed_pro_version < 90) + struct drbd_device *device = peer_device->device; + + if (peer_device->connection->agreed_pro_version < 90) device->ov_start_sector = 0; device->rs_total = drbd_bm_bits(device); device->ov_position = 0; @@ -1387,7 +1389,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, unsigned long now = jiffies; int i; - set_ov_position(device, ns.conn); + set_ov_position(peer_device, ns.conn); device->rs_start = now; device->rs_last_sect_ev = 0; device->ov_last_oos_size = 0; @@ -1398,7 +1400,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns, device->rs_mark_time[i] = now; } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); if (ns.conn == C_VERIFY_S) { drbd_info(device, "Starting Online Verify from sector %llu\n", @@ -1518,8 +1520,9 @@ static void abw_start_sync(struct drbd_device *device, int rv) } int drbd_bitmap_io_from_worker(struct drbd_device *device, - int (*io_fn)(struct drbd_device *), - char *why, enum bm_flag flags) + int (*io_fn)(struct drbd_device *, struct drbd_peer_device *), + char *why, enum bm_flag flags, + struct drbd_peer_device *peer_device) { int rv; @@ -1529,7 +1532,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, atomic_inc(&device->suspend_cnt); drbd_bm_lock(device, why, flags); - rv = io_fn(device); + rv = io_fn(device, peer_device); drbd_bm_unlock(device); drbd_resume_io(device); @@ -1809,7 +1812,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, device->state.conn == C_WF_BITMAP_S) drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)", - BM_LOCKED_TEST_ALLOWED); + BM_LOCKED_TEST_ALLOWED, peer_device); /* Lost contact to peer's copy of the data */ if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) { @@ -1839,7 +1842,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, * No harm done if the bitmap still changes, * redirtied pages will follow later. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote diskless peer", BM_LOCKED_SET_ALLOWED); + "demote diskless peer", BM_LOCKED_SET_ALLOWED, peer_device); put_ldev(device); } @@ -1851,7 +1854,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* No changes to the bitmap expected this time, so assert that, * even though no harm was done if it did change. */ drbd_bitmap_io_from_worker(device, &drbd_bm_write, - "demote", BM_LOCKED_TEST_ALLOWED); + "demote", BM_LOCKED_TEST_ALLOWED, peer_device); put_ldev(device); } @@ -1888,7 +1891,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* no other bitmap changes expected during this phase */ drbd_queue_bitmap_io(device, &drbd_bmio_set_n_write, &abw_start_sync, - "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED); + "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED, + peer_device); /* first half of local IO error, failure to attach, * or administrative detach */ @@ -2011,7 +2015,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) && (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) { drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL, - "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED); + "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED, + peer_device); put_ldev(device); } diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index f46738040d6b..4352a50fbb3f 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -28,8 +28,8 @@ #include "drbd_protocol.h" #include "drbd_req.h" -static int make_ov_request(struct drbd_device *, int); -static int make_resync_request(struct drbd_device *, int); +static int make_ov_request(struct drbd_peer_device *, int); +static int make_resync_request(struct drbd_peer_device *, int); /* endio handlers: * drbd_md_endio (defined here) @@ -124,7 +124,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l * In case of a write error, send the neg ack anyways. */ if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags)) inc_unacked(device); - drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size); } spin_lock_irqsave(&device->resource->req_lock, flags); @@ -276,7 +276,7 @@ void drbd_request_endio(struct bio *bio) /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); - __req_mod(req, what, &m); + __req_mod(req, what, NULL, &m); spin_unlock_irqrestore(&device->resource->req_lock, flags); put_ldev(device); @@ -363,7 +363,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_CSUM_RS_REQUEST); @@ -430,10 +430,10 @@ int w_resync_timer(struct drbd_work *w, int cancel) switch (device->state.conn) { case C_VERIFY_S: - make_ov_request(device, cancel); + make_ov_request(first_peer_device(device), cancel); break; case C_SYNC_TARGET: - make_resync_request(device, cancel); + make_resync_request(first_peer_device(device), cancel); break; } @@ -493,8 +493,9 @@ struct fifo_buffer *fifo_alloc(unsigned int fifo_size) return fb; } -static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) +static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in) { + struct drbd_device *device = peer_device->device; struct disk_conf *dc; unsigned int want; /* The number of sectors we want in-flight */ int req_sect; /* Number of sectors to request in this turn */ @@ -545,8 +546,9 @@ static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) return req_sect; } -static int drbd_rs_number_requests(struct drbd_device *device) +static int drbd_rs_number_requests(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; unsigned int sect_in; /* Number of sectors that came in since the last turn */ int number, mxb; @@ -556,7 +558,7 @@ static int drbd_rs_number_requests(struct drbd_device *device) rcu_read_lock(); mxb = drbd_get_max_buffers(device) / 2; if (rcu_dereference(device->rs_plan_s)->size) { - number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); + number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9); device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; } else { device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; @@ -580,9 +582,9 @@ static int drbd_rs_number_requests(struct drbd_device *device) return number; } -static int make_resync_request(struct drbd_device *const device, int cancel) +static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel) { - struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_device *const device = peer_device->device; struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; unsigned long bit; sector_t sector; @@ -598,7 +600,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) if (device->rs_total == 0) { /* empty resync? */ - drbd_resync_finished(device); + drbd_resync_finished(peer_device); return 0; } @@ -618,7 +620,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel) } max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); if (number <= 0) goto requeue; @@ -653,7 +655,7 @@ next_sector: sector = BM_BIT_TO_SECT(bit); - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->bm_resync_fo = bit; goto requeue; } @@ -729,13 +731,13 @@ next_sector: } else { int err; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest(peer_device, size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); - dec_rs_pending(device); + dec_rs_pending(peer_device); put_ldev(device); return err; } @@ -760,8 +762,9 @@ next_sector: return 0; } -static int make_ov_request(struct drbd_device *device, int cancel) +static int make_ov_request(struct drbd_peer_device *peer_device, int cancel) { + struct drbd_device *device = peer_device->device; int number, i, size; sector_t sector; const sector_t capacity = get_capacity(device->vdisk); @@ -770,7 +773,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (unlikely(cancel)) return 1; - number = drbd_rs_number_requests(device); + number = drbd_rs_number_requests(peer_device); sector = device->ov_position; for (i = 0; i < number; i++) { @@ -788,7 +791,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) size = BM_BLOCK_SIZE; - if (drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(peer_device, sector)) { device->ov_position = sector; goto requeue; } @@ -796,9 +799,9 @@ static int make_ov_request(struct drbd_device *device, int cancel) if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - inc_rs_pending(device); + inc_rs_pending(peer_device); if (drbd_send_ov_request(first_peer_device(device), sector, size)) { - dec_rs_pending(device); + dec_rs_pending(peer_device); return 0; } sector += BM_SECT_PER_BIT; @@ -818,8 +821,8 @@ int w_ov_finished(struct drbd_work *w, int cancel) container_of(w, struct drbd_device_work, w); struct drbd_device *device = dw->device; kfree(dw); - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(first_peer_device(device)); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -831,7 +834,7 @@ static int w_resync_finished(struct drbd_work *w, int cancel) struct drbd_device *device = dw->device; kfree(dw); - drbd_resync_finished(device); + drbd_resync_finished(first_peer_device(device)); return 0; } @@ -846,9 +849,10 @@ static void ping_peer(struct drbd_device *device) test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED); } -int drbd_resync_finished(struct drbd_device *device) +int drbd_resync_finished(struct drbd_peer_device *peer_device) { - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_device *device = peer_device->device; + struct drbd_connection *connection = peer_device->connection; unsigned long db, dt, dbdt; unsigned long n_oos; union drbd_state os, ns; @@ -1129,7 +1133,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req); } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) { if (likely(device->state.pdsk >= D_INCONSISTENT)) { - inc_rs_pending(device); + inc_rs_pending(peer_device); if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req)) err = drbd_send_rs_deallocated(peer_device, peer_req); else @@ -1148,7 +1152,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel) err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req); /* update resync data with failure */ - drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size); + drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size); } dec_unacked(device); @@ -1199,12 +1203,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel) } if (eq) { - drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size); + drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size); /* rs_same_csums unit is BM_BLOCK_SIZE */ device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT; err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req); } else { - inc_rs_pending(device); + inc_rs_pending(peer_device); peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */ peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */ kfree(di); @@ -1257,10 +1261,10 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); peer_req = NULL; - inc_rs_pending(device); + inc_rs_pending(peer_device); err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY); if (err) - dec_rs_pending(device); + dec_rs_pending(peer_device); kfree(digest); out: @@ -1270,15 +1274,16 @@ out: return err; } -void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size) +void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size) { + struct drbd_device *device = peer_device->device; if (device->ov_last_oos_start + device->ov_last_oos_size == sector) { device->ov_last_oos_size += size>>9; } else { device->ov_last_oos_start = sector; device->ov_last_oos_size = size>>9; } - drbd_set_out_of_sync(device, sector, size); + drbd_set_out_of_sync(peer_device, sector, size); } int w_e_end_ov_reply(struct drbd_work *w, int cancel) @@ -1328,9 +1333,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) * drbd_alloc_pages due to pp_in_use > max_buffers. */ drbd_free_peer_req(device, peer_req); if (!eq) - drbd_ov_out_of_sync_found(device, sector, size); + drbd_ov_out_of_sync_found(peer_device, sector, size); else - ov_out_of_sync_print(device); + ov_out_of_sync_print(peer_device); err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, eq ? ID_IN_SYNC : ID_OUT_OF_SYNC); @@ -1341,14 +1346,14 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel) /* let's advance progress step marks only for every other megabyte */ if ((device->ov_left & 0x200) == 0x200) - drbd_advance_rs_marks(device, device->ov_left); + drbd_advance_rs_marks(peer_device, device->ov_left); stop_sector_reached = verify_can_do_stop_sector(device) && (sector + (size>>9)) >= device->ov_stop_sector; if (device->ov_left == 0 || stop_sector_reached) { - ov_out_of_sync_print(device); - drbd_resync_finished(device); + ov_out_of_sync_print(peer_device); + drbd_resync_finished(peer_device); } return err; @@ -1425,7 +1430,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1437,7 +1442,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) maybe_send_barrier(connection, req->epoch); err = drbd_send_out_of_sync(peer_device, req); - req_mod(req, OOS_HANDED_TO_NETWORK); + req_mod(req, OOS_HANDED_TO_NETWORK, peer_device); return err; } @@ -1457,7 +1462,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1467,7 +1472,7 @@ int w_send_dblock(struct drbd_work *w, int cancel) connection->send.current_epoch_writes++; err = drbd_send_dblock(peer_device, req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); @@ -1490,7 +1495,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) int err; if (unlikely(cancel)) { - req_mod(req, SEND_CANCELED); + req_mod(req, SEND_CANCELED, peer_device); return 0; } req->pre_send_jif = jiffies; @@ -1502,7 +1507,7 @@ int w_send_read_req(struct drbd_work *w, int cancel) err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); - req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); + req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device); if (do_send_unplug && !err) pd_send_unplug_remote(peer_device); @@ -1668,8 +1673,9 @@ void drbd_resync_after_changed(struct drbd_device *device) } while (changed); } -void drbd_rs_controller_reset(struct drbd_device *device) +void drbd_rs_controller_reset(struct drbd_peer_device *peer_device) { + struct drbd_device *device = peer_device->device; struct gendisk *disk = device->ldev->backing_bdev->bd_disk; struct fifo_buffer *plan; @@ -1891,10 +1897,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) rcu_read_unlock(); schedule_timeout_interruptible(timeo); } - drbd_resync_finished(device); + drbd_resync_finished(peer_device); } - drbd_rs_controller_reset(device); + drbd_rs_controller_reset(peer_device); /* ns.conn may already be != device->state.conn, * we may have been paused in between, or become paused until * the timer triggers. @@ -1909,8 +1915,9 @@ out: mutex_unlock(device->state_mutex); } -static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) +static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done) { + struct drbd_device *device = peer_device->device; struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; device->rs_last_bcast = jiffies; @@ -1919,7 +1926,7 @@ static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) drbd_bm_write_lazy(device, 0); if (resync_done && is_sync_state(device->state.conn)) - drbd_resync_finished(device); + drbd_resync_finished(peer_device); drbd_bcast_event(device, &sib); /* update timestamp, in case it took a while to write out stuff */ @@ -1945,6 +1952,7 @@ static void drbd_ldev_destroy(struct drbd_device *device) static void go_diskless(struct drbd_device *device) { + struct drbd_peer_device *peer_device = first_peer_device(device); D_ASSERT(device, device->state.disk == D_FAILED); /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will * inc/dec it frequently. Once we are D_DISKLESS, no one will touch @@ -1970,7 +1978,7 @@ static void go_diskless(struct drbd_device *device) * Any modifications would not be expected anymore, though. */ if (drbd_bitmap_io_from_worker(device, drbd_bm_write, - "detach", BM_LOCKED_TEST_ALLOWED)) { + "detach", BM_LOCKED_TEST_ALLOWED, peer_device)) { if (test_bit(WAS_READ_ERROR, &device->flags)) { drbd_md_set_flag(device, MDF_FULL_SYNC); drbd_md_sync(device); @@ -2017,7 +2025,7 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo) do_md_sync(device); if (test_bit(RS_DONE, &todo) || test_bit(RS_PROGRESS, &todo)) - update_on_disk_bitmap(device, test_bit(RS_DONE, &todo)); + update_on_disk_bitmap(first_peer_device(device), test_bit(RS_DONE, &todo)); if (test_bit(GO_DISKLESS, &todo)) go_diskless(device); if (test_bit(DESTROY_DISK, &todo)) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 592cfa8b765a..d445fd0934bd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -325,6 +325,9 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, if (blk_validate_block_size(blksize)) return -EINVAL; + if (bytesize < 0) + return -EINVAL; + nbd->config->bytesize = bytesize; nbd->config->blksize_bits = __ffs(blksize); @@ -1111,6 +1114,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; + /* Arg will be cast to int, check it to avoid overflow */ + if (arg > INT_MAX) + return -EINVAL; sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -1934,11 +1940,11 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } - if (!info->attrs[NBD_ATTR_SOCKETS]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) { pr_err("must specify at least one socket\n"); return -EINVAL; } - if (!info->attrs[NBD_ATTR_SIZE_BYTES]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) { pr_err("must specify a size in bytes for the device\n"); return -EINVAL; } @@ -2123,7 +2129,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify an index to disconnect\n"); return -EINVAL; } @@ -2161,7 +2167,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (!info->attrs[NBD_ATTR_INDEX]) { + if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) { pr_err("must specify a device to reconfigure\n"); return -EINVAL; } @@ -2325,6 +2331,7 @@ static struct genl_family nbd_genl_family __ro_after_init = { .n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops), .resv_start_op = NBD_CMD_STATUS + 1, .maxattr = NBD_ATTR_MAX, + .netnsok = 1, .policy = nbd_attr_policy, .mcgrps = nbd_mcast_grps, .n_mcgrps = ARRAY_SIZE(nbd_mcast_grps), diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig index 6bf1f8ca20a2..ff23bb9346d0 100644 --- a/drivers/block/null_blk/Kconfig +++ b/drivers/block/null_blk/Kconfig @@ -9,4 +9,4 @@ config BLK_DEV_NULL_BLK config BLK_DEV_NULL_BLK_FAULT_INJECTION bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION_CONFIGFS diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9e6b032c8ecc..b195b8b9fe32 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -250,7 +250,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache); static inline struct nullb_device *to_nullb_device(struct config_item *item) { - return item ? container_of(item, struct nullb_device, item) : NULL; + return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL; } static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page) @@ -593,8 +593,29 @@ static const struct config_item_type nullb_device_type = { .ct_owner = THIS_MODULE, }; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static void nullb_add_fault_config(struct nullb_device *dev) +{ + fault_config_init(&dev->timeout_config, "timeout_inject"); + fault_config_init(&dev->requeue_config, "requeue_inject"); + fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject"); + + configfs_add_default_group(&dev->timeout_config.group, &dev->group); + configfs_add_default_group(&dev->requeue_config.group, &dev->group); + configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group); +} + +#else + +static void nullb_add_fault_config(struct nullb_device *dev) +{ +} + +#endif + static struct -config_item *nullb_group_make_item(struct config_group *group, const char *name) +config_group *nullb_group_make_group(struct config_group *group, const char *name) { struct nullb_device *dev; @@ -605,9 +626,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name) if (!dev) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&dev->item, name, &nullb_device_type); + config_group_init_type_name(&dev->group, name, &nullb_device_type); + nullb_add_fault_config(dev); - return &dev->item; + return &dev->group; } static void @@ -645,7 +667,7 @@ static struct configfs_attribute *nullb_group_attrs[] = { }; static struct configfs_group_operations nullb_group_ops = { - .make_item = nullb_group_make_item, + .make_group = nullb_group_make_group, .drop_item = nullb_group_drop_item, }; @@ -676,6 +698,13 @@ static struct nullb_device *null_alloc_dev(void) dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return NULL; + +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + dev->timeout_config.attr = null_timeout_attr; + dev->requeue_config.attr = null_requeue_attr; + dev->init_hctx_fault_config.attr = null_init_hctx_attr; +#endif + INIT_RADIX_TREE(&dev->data, GFP_ATOMIC); INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC); if (badblocks_init(&dev->badblocks, 0)) { @@ -1030,8 +1059,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) if (!t_page) return -ENOMEM; - src = kmap_atomic(c_page->page); - dst = kmap_atomic(t_page->page); + src = kmap_local_page(c_page->page); + dst = kmap_local_page(t_page->page); for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { @@ -1043,8 +1072,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) } } - kunmap_atomic(dst); - kunmap_atomic(src); + kunmap_local(dst); + kunmap_local(src); ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page); null_free_page(ret); @@ -1112,7 +1141,6 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1126,11 +1154,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, if (!t_page) return -ENOSPC; - src = kmap_atomic(source); - dst = kmap_atomic(t_page->page); - memcpy(dst + offset, src + off + count, temp); - kunmap_atomic(dst); - kunmap_atomic(src); + memcpy_page(t_page->page, offset, source, off + count, temp); __set_bit(sector & SECTOR_MASK, t_page->bitmap); @@ -1149,7 +1173,6 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, size_t temp, count = 0; unsigned int offset; struct nullb_page *t_page; - void *dst, *src; while (count < n) { temp = min_t(size_t, nullb->dev->blocksize, n - count); @@ -1158,16 +1181,11 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest, t_page = null_lookup_page(nullb, sector, false, !null_cache_active(nullb)); - dst = kmap_atomic(dest); - if (!t_page) { - memset(dst + off + count, 0, temp); - goto next; - } - src = kmap_atomic(t_page->page); - memcpy(dst + off + count, src + offset, temp); - kunmap_atomic(src); -next: - kunmap_atomic(dst); + if (t_page) + memcpy_page(dest, off + count, t_page->page, offset, + temp); + else + zero_user(dest, off + count, temp); count += temp; sector += temp >> SECTOR_SHIFT; @@ -1178,11 +1196,7 @@ next: static void nullb_fill_pattern(struct nullb *nullb, struct page *page, unsigned int len, unsigned int off) { - void *dst; - - dst = kmap_atomic(page); - memset(dst + off, 0xFF, len); - kunmap_atomic(dst); + memset_page(page, off, 0xff, len); } blk_status_t null_handle_discard(struct nullb_device *dev, @@ -1529,24 +1543,48 @@ static void null_submit_bio(struct bio *bio) null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio)); } +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + +static bool should_timeout_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->timeout_config.attr, 1); +} + +static bool should_requeue_request(struct request *rq) +{ + struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); + struct nullb_device *dev = cmd->nq->dev; + + return should_fail(&dev->requeue_config.attr, 1); +} + +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return should_fail(&dev->init_hctx_fault_config.attr, 1); +} + +#else + static bool should_timeout_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_timeout_str[0]) - return should_fail(&null_timeout_attr, 1); -#endif return false; } static bool should_requeue_request(struct request *rq) { -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_requeue_str[0]) - return should_fail(&null_requeue_attr, 1); -#endif return false; } +static bool should_init_hctx_fail(struct nullb_device *dev) +{ + return false; +} + +#endif + static void null_map_queues(struct blk_mq_tag_set *set) { struct nullb *nullb = set->driver_data; @@ -1743,10 +1781,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, struct nullb *nullb = hctx->queue->queuedata; struct nullb_queue *nq; -#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1)) + if (should_init_hctx_fail(nullb->dev)) return -EFAULT; -#endif nq = &nullb->queues[hctx_idx]; hctx->driver_data = nq; @@ -1964,6 +2000,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set) static int null_validate_conf(struct nullb_device *dev) { + if (dev->queue_mode == NULL_Q_RQ) { + pr_err("legacy IO path is no longer available\n"); + return -EINVAL; + } + dev->blocksize = round_down(dev->blocksize, 512); dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096); @@ -2066,9 +2107,6 @@ static int null_add_dev(struct nullb_device *dev) if (rv) goto out_cleanup_queues; - if (!null_setup_fault()) - goto out_cleanup_tags; - nullb->tag_set->timeout = 5 * HZ; nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb); if (IS_ERR(nullb->disk)) { @@ -2130,10 +2168,10 @@ static int null_add_dev(struct nullb_device *dev) null_config_discard(nullb); - if (config_item_name(&dev->item)) { + if (config_item_name(&dev->group.cg_item)) { /* Use configfs dir name as the device name */ snprintf(nullb->disk_name, sizeof(nullb->disk_name), - "%s", config_item_name(&dev->item)); + "%s", config_item_name(&dev->group.cg_item)); } else { sprintf(nullb->disk_name, "nullb%d", nullb->index); } @@ -2233,6 +2271,9 @@ static int __init null_init(void) g_home_node = NUMA_NO_NODE; } + if (!null_setup_fault()) + return -EINVAL; + if (g_queue_mode == NULL_Q_RQ) { pr_err("legacy IO path is no longer available\n"); return -EINVAL; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index eb5972c50be8..929f659dd255 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -69,7 +69,12 @@ enum { struct nullb_device { struct nullb *nullb; - struct config_item item; + struct config_group group; +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + struct fault_config timeout_config; + struct fault_config requeue_config; + struct fault_config init_hctx_fault_config; +#endif struct radix_tree_root data; /* data stored in the disk */ struct radix_tree_root cache; /* disk cache data */ unsigned long flags; /* device flags */ diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 604c1a13c76e..afbef182820b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -53,7 +53,8 @@ | UBLK_F_NEED_GET_DATA \ | UBLK_F_USER_RECOVERY \ | UBLK_F_USER_RECOVERY_REISSUE \ - | UBLK_F_UNPRIVILEGED_DEV) + | UBLK_F_UNPRIVILEGED_DEV \ + | UBLK_F_CMD_IOCTL_ENCODE) /* All UBLK_PARAM_TYPE_* should be included here */ #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \ @@ -298,9 +299,7 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) static inline bool ublk_need_get_data(const struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_NEED_GET_DATA) - return true; - return false; + return ubq->flags & UBLK_F_NEED_GET_DATA; } static struct ublk_device *ublk_get_device(struct ublk_device *ub) @@ -349,25 +348,19 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) static inline bool ublk_queue_can_use_recovery_reissue( struct ublk_queue *ubq) { - if ((ubq->flags & UBLK_F_USER_RECOVERY) && - (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE)) - return true; - return false; + return (ubq->flags & UBLK_F_USER_RECOVERY) && + (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE); } static inline bool ublk_queue_can_use_recovery( struct ublk_queue *ubq) { - if (ubq->flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ubq->flags & UBLK_F_USER_RECOVERY; } static inline bool ublk_can_use_recovery(struct ublk_device *ub) { - if (ub->dev_info.flags & UBLK_F_USER_RECOVERY) - return true; - return false; + return ub->dev_info.flags & UBLK_F_USER_RECOVERY; } static void ublk_free_disk(struct gendisk *disk) @@ -428,10 +421,9 @@ static const struct block_device_operations ub_fops = { #define UBLK_MAX_PIN_PAGES 32 struct ublk_map_data { - const struct ublk_queue *ubq; const struct request *rq; - const struct ublk_io *io; - unsigned max_bytes; + unsigned long ubuf; + unsigned int len; }; struct ublk_io_iter { @@ -488,18 +480,17 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data, return done; } -static inline int ublk_copy_user_pages(struct ublk_map_data *data, - bool to_vm) +static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm) { const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0; - const unsigned long start_vm = data->io->addr; + const unsigned long start_vm = data->ubuf; unsigned int done = 0; struct ublk_io_iter iter = { .pg_off = start_vm & (PAGE_SIZE - 1), .bio = data->rq->bio, .iter = data->rq->bio->bi_iter, }; - const unsigned int nr_pages = round_up(data->max_bytes + + const unsigned int nr_pages = round_up(data->len + (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT; while (done < nr_pages) { @@ -512,42 +503,49 @@ static inline int ublk_copy_user_pages(struct ublk_map_data *data, iter.pages); if (iter.nr_pages <= 0) return done == 0 ? iter.nr_pages : done; - len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm); + len = ublk_copy_io_pages(&iter, data->len, to_vm); for (i = 0; i < iter.nr_pages; i++) { if (to_vm) set_page_dirty(iter.pages[i]); put_page(iter.pages[i]); } - data->max_bytes -= len; + data->len -= len; done += iter.nr_pages; } return done; } +static inline bool ublk_need_map_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE; +} + +static inline bool ublk_need_unmap_req(const struct request *req) +{ + return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ; +} + static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, struct ublk_io *io) { const unsigned int rq_bytes = blk_rq_bytes(req); + /* * no zero copy, we delay copy WRITE request data into ublksrv * context and the big benefit is that pinning pages in current * context is pretty fast, see ublk_pin_user_pages */ - if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH) - return rq_bytes; - - if (ublk_rq_has_data(req)) { + if (ublk_need_map_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = rq_bytes, + .ubuf = io->addr, + .len = rq_bytes, }; ublk_copy_user_pages(&data, true); - return rq_bytes - data.max_bytes; + return rq_bytes - data.len; } return rq_bytes; } @@ -558,19 +556,18 @@ static int ublk_unmap_io(const struct ublk_queue *ubq, { const unsigned int rq_bytes = blk_rq_bytes(req); - if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) { + if (ublk_need_unmap_req(req)) { struct ublk_map_data data = { - .ubq = ubq, .rq = req, - .io = io, - .max_bytes = io->res, + .ubuf = io->addr, + .len = io->res, }; WARN_ON_ONCE(io->res > rq_bytes); ublk_copy_user_pages(&data, false); - return io->res - data.max_bytes; + return io->res - data.len; } return rq_bytes; } @@ -655,14 +652,15 @@ static void ublk_complete_rq(struct request *req) struct ublk_queue *ubq = req->mq_hctx->driver_data; struct ublk_io *io = &ubq->ios[req->tag]; unsigned int unmapped_bytes; + blk_status_t res = BLK_STS_OK; /* failed read IO if nothing is read */ if (!io->res && req_op(req) == REQ_OP_READ) io->res = -EIO; if (io->res < 0) { - blk_mq_end_request(req, errno_to_blk_status(io->res)); - return; + res = errno_to_blk_status(io->res); + goto exit; } /* @@ -671,10 +669,8 @@ static void ublk_complete_rq(struct request *req) * * Both the two needn't unmap. */ - if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) { - blk_mq_end_request(req, BLK_STS_OK); - return; - } + if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) + goto exit; /* for READ request, writing data in iod->addr to rq buffers */ unmapped_bytes = ublk_unmap_io(ubq, req, io); @@ -691,6 +687,10 @@ static void ublk_complete_rq(struct request *req) blk_mq_requeue_request(req, true); else __blk_mq_end_request(req, BLK_STS_OK); + + return; +exit: + blk_mq_end_request(req, res); } /* @@ -771,9 +771,7 @@ static inline void __ublk_rq_task_work(struct request *req, return; } - if (ublk_need_get_data(ubq) && - (req_op(req) == REQ_OP_WRITE || - req_op(req) == REQ_OP_FLUSH)) { + if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) { /* * We have not handled UBLK_IO_NEED_GET_DATA command yet, * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv @@ -1261,6 +1259,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, ublk_queue_cmd(ubq, req); } +static inline int ublk_check_cmd_op(u32 cmd_op) +{ + u32 ioc_type = _IOC_TYPE(cmd_op); + + if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u') + return -EOPNOTSUPP; + + if (ioc_type != 'u' && ioc_type != 0) + return -EOPNOTSUPP; + + return 0; +} + static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags, struct ublksrv_io_cmd *ub_cmd) @@ -1303,10 +1314,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd, * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA. */ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) - ^ (cmd_op == UBLK_IO_NEED_GET_DATA)) + ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA)) + goto out; + + ret = ublk_check_cmd_op(cmd_op); + if (ret) goto out; - switch (cmd_op) { + ret = -EINVAL; + switch (_IOC_NR(cmd_op)) { case UBLK_IO_FETCH_REQ: /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ if (ublk_queue_ready(ubq)) { @@ -1770,6 +1786,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK)) ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK; + ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE; + /* We are not ready to support zero copy */ ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; @@ -2128,7 +2146,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, * know if the specified device is created as unprivileged * mode. */ - if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2) + if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2) return 0; } @@ -2154,7 +2172,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub, dev_path[header->dev_path_len] = 0; ret = -EINVAL; - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd->cmd_op)) { case UBLK_CMD_GET_DEV_INFO: case UBLK_CMD_GET_DEV_INFO2: case UBLK_CMD_GET_QUEUE_AFFINITY: @@ -2193,6 +2211,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; struct ublk_device *ub = NULL; + u32 cmd_op = cmd->cmd_op; int ret = -EINVAL; if (issue_flags & IO_URING_F_NONBLOCK) @@ -2203,22 +2222,22 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (!(issue_flags & IO_URING_F_SQE128)) goto out; - if (cmd->cmd_op != UBLK_CMD_ADD_DEV) { + ret = ublk_check_cmd_op(cmd_op); + if (ret) + goto out; + + if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) { ret = -ENODEV; ub = ublk_get_device_from_id(header->dev_id); if (!ub) goto out; ret = ublk_ctrl_uring_cmd_permission(ub, cmd); - } else { - /* ADD_DEV permission check is done in command handler */ - ret = 0; + if (ret) + goto put_dev; } - if (ret) - goto put_dev; - - switch (cmd->cmd_op) { + switch (_IOC_NR(cmd_op)) { case UBLK_CMD_START_DEV: ret = ublk_ctrl_start_dev(ub, cmd); break; |