summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 12:52:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-26 12:52:58 -0700
commit9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a (patch)
treec70bb7d65a50a51686378b6113a8663e0e60d9b8 /drivers
parent5b9a7bb72fddbc5247f56ede55d485fab7abdf92 (diff)
parent55793ea54d77719a071b1ccc05a05056e3b5e009 (diff)
downloadlwn-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.tar.gz
lwn-9dd6956b38923dc1b7b349ca1eee3c0bb1f0163a.zip
Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - drbd patches, bringing us closer to unifying the out-of-tree version and the in tree one (Andreas, Christoph) - support for auto-quiesce for the s390 dasd driver (Stefan) - MD pull request via Song: - md/bitmap: Optimal last page size (Jon Derrick) - Various raid10 fixes (Yu Kuai, Li Nan) - md: add error_handlers for raid0 and linear (Mariusz Tkaczyk) - NVMe pull request via Christoph: - Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas) - Validate nvmet module parameters (Chaitanya Kulkarni) - Fence TCP socket on receive error (Chris Leech) - Fix async event trace event (Keith Busch) - Minor cleanups (Chaitanya Kulkarni, zhenwei pi) - Fix and cleanup nvmet Identify handling (Damien Le Moal, Christoph Hellwig) - Fix double blk_mq_complete_request race in the timeout handler (Lei Yin) - Fix irq locking in nvme-fcloop (Ming Lei) - Remove queue mapping helper for rdma devices (Sagi Grimberg) - use structured request attribute checks for nbd (Jakub) - fix blk-crypto race conditions between keyslot management (Eric) - add sed-opal support for reading read locking range attributes (Ondrej) - make fault injection configurable for null_blk (Akinobu) - clean up the request insertion API (Christoph) - clean up the queue running API (Christoph) - blkg config helper cleanups (Tejun) - lazy init support for blk-iolatency (Tejun) - various fixes and tweaks to ublk (Ming) - remove hybrid polling. It hasn't really been useful since we got async polled IO support, and these days we don't support sync polled IO at all (Keith) - misc fixes, cleanups, improvements (Zhong, Ondrej, Colin, Chengming, Chaitanya, me) * tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux: (118 commits) nbd: fix incomplete validation of ioctl arg ublk: don't return 0 in case of any failure sed-opal: geometry feature reporting command null_blk: Always check queue mode setting from configfs block: ublk: switch to ioctl command encoding blk-mq: fix the blk_mq_add_to_requeue_list call in blk_kick_flush block, bfq: Fix division by zero error on zero wsum fault-inject: fix build error when FAULT_INJECTION_CONFIGFS=y and CONFIGFS_FS=m block: store bdev->bd_disk->fops->submit_bio state in bdev block: re-arrange the struct block_device fields for better layout md/raid5: remove unused working_disks variable md/raid10: don't call bio_start_io_acct twice for bio which experienced read error md/raid10: fix memleak of md thread md/raid10: fix memleak for 'conf->bio_split' md/raid10: fix leak of 'r10bio->remaining' for recovery md/raid10: don't BUG_ON() in raise_barrier() md: fix soft lockup in status_resync md: add error_handlers for raid0 and linear md: Use optimal I/O size for last bitmap page md: Fix types in sb writer ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/drbd/drbd_actlog.c13
-rw-r--r--drivers/block/drbd/drbd_bitmap.c13
-rw-r--r--drivers/block/drbd/drbd_int.h120
-rw-r--r--drivers/block/drbd/drbd_main.c72
-rw-r--r--drivers/block/drbd/drbd_nl.c19
-rw-r--r--drivers/block/drbd/drbd_receiver.c102
-rw-r--r--drivers/block/drbd/drbd_req.c30
-rw-r--r--drivers/block/drbd/drbd_req.h11
-rw-r--r--drivers/block/drbd/drbd_state.c29
-rw-r--r--drivers/block/drbd/drbd_worker.c114
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/block/null_blk/Kconfig2
-rw-r--r--drivers/block/null_blk/main.c135
-rw-r--r--drivers/block/null_blk/null_blk.h7
-rw-r--r--drivers/block/ublk_drv.c133
-rw-r--r--drivers/md/dm-table.c19
-rw-r--r--drivers/md/md-bitmap.c143
-rw-r--r--drivers/md/md-linear.c14
-rw-r--r--drivers/md/md.c27
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid0.c14
-rw-r--r--drivers/md/raid10.c102
-rw-r--r--drivers/md/raid5.c5
-rw-r--r--drivers/nvme/host/apple.c8
-rw-r--r--drivers/nvme/host/core.c9
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/host/rdma.c19
-rw-r--r--drivers/nvme/host/tcp.c3
-rw-r--r--drivers/nvme/host/trace.h15
-rw-r--r--drivers/nvme/target/admin-cmd.c81
-rw-r--r--drivers/nvme/target/fcloop.c48
-rw-r--r--drivers/nvme/target/nvmet.h12
-rw-r--r--drivers/nvme/target/tcp.c34
-rw-r--r--drivers/nvme/target/zns.c20
-rw-r--r--drivers/s390/block/dasd.c75
-rw-r--r--drivers/s390/block/dasd_devmap.c126
-rw-r--r--drivers/s390/block/dasd_eckd.c1
-rw-r--r--drivers/s390/block/dasd_eer.c1
-rw-r--r--drivers/s390/block/dasd_int.h32
40 files changed, 1008 insertions, 648 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index f79f20430ef7..5b9d4aaebb81 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -385,6 +385,23 @@ config BLK_DEV_UBLK
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.
+config BLKDEV_UBLK_LEGACY_OPCODES
+ bool "Support legacy command opcode"
+ depends on BLK_DEV_UBLK
+ default y
+ help
+ ublk driver started to take plain command encoding, which turns out
+ one bad way. The traditional ioctl command opcode encodes more
+ info and basically defines each code uniquely, so opcode conflict
+ is avoided, and driver can handle wrong command easily, meantime it
+ may help security subsystem to audit io_uring command.
+
+ Say Y if your application still uses legacy command opcode.
+
+ Say N if you don't want to support legacy command opcode. It is
+ suggested to enable N if your application(ublk server) switches to
+ ioctl command encoding.
+
source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 429255876800..64b3a1c76f03 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -735,8 +735,9 @@ static bool update_rs_extent(struct drbd_device *device,
return false;
}
-void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
+void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go)
{
+ struct drbd_device *device = peer_device->device;
unsigned long now = jiffies;
unsigned long last = device->rs_mark_time[device->rs_last_mark];
int next = (device->rs_last_mark + 1) % DRBD_SYNC_MARKS;
@@ -819,7 +820,7 @@ static int update_sync_bits(struct drbd_device *device,
if (mode == SET_IN_SYNC) {
unsigned long still_to_go = drbd_bm_total_weight(device);
bool rs_is_done = (still_to_go <= device->rs_failed);
- drbd_advance_rs_marks(device, still_to_go);
+ drbd_advance_rs_marks(first_peer_device(device), still_to_go);
if (cleared || rs_is_done)
maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
} else if (mode == RECORD_RS_FAILED)
@@ -843,10 +844,11 @@ static bool plausible_request_size(int size)
* called by worker on C_SYNC_TARGET and receiver on SyncSource.
*
*/
-int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size,
enum update_sync_bits_mode mode)
{
/* Is called from worker and receiver context _only_ */
+ struct drbd_device *device = peer_device->device;
unsigned long sbnr, ebnr, lbnr;
unsigned long count = 0;
sector_t esector, nr_sectors;
@@ -1009,14 +1011,15 @@ retry:
* tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
* if there is still application IO going on in this area.
*/
-int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
+int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector)
{
+ struct drbd_device *device = peer_device->device;
unsigned int enr = BM_SECT_TO_EXT(sector);
const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
struct lc_element *e;
struct bm_extent *bm_ext;
int i;
- bool throttle = drbd_rs_should_slow_down(device, sector, true);
+ bool throttle = drbd_rs_should_slow_down(peer_device, sector, true);
/* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
* not yet BME_LOCKED) extent needs to be kicked out explicitly if we
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 289876ffbc31..6ac8c54b44c7 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1216,7 +1216,9 @@ static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned
* drbd_bm_read() - Read the whole bitmap from its on disk location.
* @device: DRBD device.
*/
-int drbd_bm_read(struct drbd_device *device) __must_hold(local)
+int drbd_bm_read(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
+
{
return bm_rw(device, BM_AIO_READ, 0);
}
@@ -1227,7 +1229,8 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
*
* Will only write pages that have changed since last IO.
*/
-int drbd_bm_write(struct drbd_device *device) __must_hold(local)
+int drbd_bm_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
{
return bm_rw(device, 0, 0);
}
@@ -1238,7 +1241,8 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
*
* Will write all pages.
*/
-int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
+int drbd_bm_write_all(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
{
return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
}
@@ -1264,7 +1268,8 @@ int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_ho
* verify is aborted due to a failed peer disk, while local IO continues, or
* pending resync acks are still being processed.
*/
-int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
+int drbd_bm_write_copy_pages(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
{
return bm_rw(device, BM_AIO_COPY_PAGES, 0);
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d89b7d03d4c8..a30a5ed811be 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -66,6 +66,7 @@ extern int drbd_proc_details;
struct drbd_device;
struct drbd_connection;
+struct drbd_peer_device;
/* Defines to control fault insertion */
enum {
@@ -126,8 +127,8 @@ struct bm_xfer_ctx {
unsigned bytes[2];
};
-extern void INFO_bm_xfer_stats(struct drbd_device *device,
- const char *direction, struct bm_xfer_ctx *c);
+extern void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
+ const char *direction, struct bm_xfer_ctx *c);
static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
{
@@ -541,9 +542,10 @@ struct drbd_md_io {
struct bm_io_work {
struct drbd_work w;
+ struct drbd_peer_device *peer_device;
char *why;
enum bm_flag flags;
- int (*io_fn)(struct drbd_device *device);
+ int (*io_fn)(struct drbd_device *device, struct drbd_peer_device *peer_device);
void (*done)(struct drbd_device *device, int rv);
};
@@ -1041,7 +1043,7 @@ extern int drbd_send_drequest_csum(struct drbd_peer_device *, sector_t sector,
enum drbd_packet cmd);
extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int size);
-extern int drbd_send_bitmap(struct drbd_device *device);
+extern int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
extern int drbd_send_rs_deallocated(struct drbd_peer_device *, struct drbd_peer_request *);
@@ -1065,17 +1067,22 @@ extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold
extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
extern void drbd_md_mark_dirty(struct drbd_device *device);
extern void drbd_queue_bitmap_io(struct drbd_device *device,
- int (*io_fn)(struct drbd_device *),
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
void (*done)(struct drbd_device *, int),
- char *why, enum bm_flag flags);
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device);
extern int drbd_bitmap_io(struct drbd_device *device,
- int (*io_fn)(struct drbd_device *),
- char *why, enum bm_flag flags);
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device);
extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
- int (*io_fn)(struct drbd_device *),
- char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
/* Meta data layout
*
@@ -1284,14 +1291,18 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
const unsigned long s, const unsigned long e);
extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
-extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
+extern int drbd_bm_read(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
-extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bm_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
extern void drbd_bm_reset_al_hints(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
-extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
-extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
+extern int drbd_bm_write_all(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
+extern int drbd_bm_write_copy_pages(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local);
extern size_t drbd_bm_words(struct drbd_device *device);
extern unsigned long drbd_bm_bits(struct drbd_device *device);
extern sector_t drbd_bm_capacity(struct drbd_device *device);
@@ -1422,21 +1433,24 @@ void drbd_resync_after_changed(struct drbd_device *device);
extern void drbd_start_resync(struct drbd_device *device, enum drbd_conns side);
extern void resume_next_sg(struct drbd_device *device);
extern void suspend_other_sg(struct drbd_device *device);
-extern int drbd_resync_finished(struct drbd_device *device);
+extern int drbd_resync_finished(struct drbd_peer_device *peer_device);
/* maybe rather drbd_main.c ? */
extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
extern void drbd_md_put_buffer(struct drbd_device *device);
extern int drbd_md_sync_page_io(struct drbd_device *device,
struct drbd_backing_dev *bdev, sector_t sector, enum req_op op);
-extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
+extern void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device,
+ sector_t sector, int size);
extern void wait_until_done_or_force_detached(struct drbd_device *device,
struct drbd_backing_dev *bdev, unsigned int *done);
-extern void drbd_rs_controller_reset(struct drbd_device *device);
+extern void drbd_rs_controller_reset(struct drbd_peer_device *peer_device);
-static inline void ov_out_of_sync_print(struct drbd_device *device)
+static inline void ov_out_of_sync_print(struct drbd_peer_device *peer_device)
{
+ struct drbd_device *device = peer_device->device;
+
if (device->ov_last_oos_size) {
- drbd_err(device, "Out of sync: start=%llu, size=%lu (sectors)\n",
+ drbd_err(peer_device, "Out of sync: start=%llu, size=%lu (sectors)\n",
(unsigned long long)device->ov_last_oos_start,
(unsigned long)device->ov_last_oos_size);
}
@@ -1475,7 +1489,7 @@ extern int drbd_ack_receiver(struct drbd_thread *thi);
extern void drbd_send_ping_wf(struct work_struct *ws);
extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
-extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+extern bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
bool throttle_if_app_is_waiting);
extern int drbd_submit_peer_request(struct drbd_peer_request *peer_req);
extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
@@ -1531,22 +1545,22 @@ extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i
extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
-extern int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector);
+extern int drbd_try_rs_begin_io(struct drbd_peer_device *peer_device, sector_t sector);
extern void drbd_rs_cancel_all(struct drbd_device *device);
extern int drbd_rs_del_all(struct drbd_device *device);
-extern void drbd_rs_failed_io(struct drbd_device *device,
+extern void drbd_rs_failed_io(struct drbd_peer_device *peer_device,
sector_t sector, int size);
-extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
+extern void drbd_advance_rs_marks(struct drbd_peer_device *peer_device, unsigned long still_to_go);
enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
-extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+extern int __drbd_change_sync(struct drbd_peer_device *peer_device, sector_t sector, int size,
enum update_sync_bits_mode mode);
-#define drbd_set_in_sync(device, sector, size) \
- __drbd_change_sync(device, sector, size, SET_IN_SYNC)
-#define drbd_set_out_of_sync(device, sector, size) \
- __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC)
-#define drbd_rs_failed_io(device, sector, size) \
- __drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
+#define drbd_set_in_sync(peer_device, sector, size) \
+ __drbd_change_sync(peer_device, sector, size, SET_IN_SYNC)
+#define drbd_set_out_of_sync(peer_device, sector, size) \
+ __drbd_change_sync(peer_device, sector, size, SET_OUT_OF_SYNC)
+#define drbd_rs_failed_io(peer_device, sector, size) \
+ __drbd_change_sync(peer_device, sector, size, RECORD_RS_FAILED)
extern void drbd_al_shrink(struct drbd_device *device);
extern int drbd_al_initialize(struct drbd_device *, void *);
@@ -1918,18 +1932,14 @@ static inline void inc_ap_pending(struct drbd_device *device)
atomic_inc(&device->ap_pending_cnt);
}
-#define ERR_IF_CNT_IS_NEGATIVE(which, func, line) \
- if (atomic_read(&device->which) < 0) \
- drbd_err(device, "in %s:%d: " #which " = %d < 0 !\n", \
- func, line, \
- atomic_read(&device->which))
-
-#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
-static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
+#define dec_ap_pending(device) ((void)expect((device), __dec_ap_pending(device) >= 0))
+static inline int __dec_ap_pending(struct drbd_device *device)
{
- if (atomic_dec_and_test(&device->ap_pending_cnt))
+ int ap_pending_cnt = atomic_dec_return(&device->ap_pending_cnt);
+
+ if (ap_pending_cnt == 0)
wake_up(&device->misc_wait);
- ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
+ return ap_pending_cnt;
}
/* counts how many resync-related answers we still expect from the peer
@@ -1938,16 +1948,16 @@ static inline void _dec_ap_pending(struct drbd_device *device, const char *func,
* C_SYNC_SOURCE sends P_RS_DATA_REPLY (and expects P_WRITE_ACK with ID_SYNCER)
* (or P_NEG_ACK with ID_SYNCER)
*/
-static inline void inc_rs_pending(struct drbd_device *device)
+static inline void inc_rs_pending(struct drbd_peer_device *peer_device)
{
- atomic_inc(&device->rs_pending_cnt);
+ atomic_inc(&peer_device->device->rs_pending_cnt);
}
-#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
-static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
+#define dec_rs_pending(peer_device) \
+ ((void)expect((peer_device), __dec_rs_pending(peer_device) >= 0))
+static inline int __dec_rs_pending(struct drbd_peer_device *peer_device)
{
- atomic_dec(&device->rs_pending_cnt);
- ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
+ return atomic_dec_return(&peer_device->device->rs_pending_cnt);
}
/* counts how many answers we still need to send to the peer.
@@ -1964,18 +1974,16 @@ static inline void inc_unacked(struct drbd_device *device)
atomic_inc(&device->unacked_cnt);
}
-#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
-static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
+#define dec_unacked(device) ((void)expect(device, __dec_unacked(device) >= 0))
+static inline int __dec_unacked(struct drbd_device *device)
{
- atomic_dec(&device->unacked_cnt);
- ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
+ return atomic_dec_return(&device->unacked_cnt);
}
-#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
-static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
+#define sub_unacked(device, n) ((void)expect(device, __sub_unacked(device) >= 0))
+static inline int __sub_unacked(struct drbd_device *device, int n)
{
- atomic_sub(n, &device->unacked_cnt);
- ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
+ return atomic_sub_return(n, &device->unacked_cnt);
}
static inline bool is_sync_target_state(enum drbd_conns connection_state)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 2c764f7ee4a7..83987e7a5ef2 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -231,9 +231,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr,
}
req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests);
list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) {
+ struct drbd_peer_device *peer_device;
if (req->epoch != expect_epoch)
break;
- _req_mod(req, BARRIER_ACKED);
+ peer_device = conn_peer_device(connection, req->device->vnr);
+ _req_mod(req, BARRIER_ACKED, peer_device);
}
spin_unlock_irq(&connection->resource->req_lock);
@@ -256,10 +258,13 @@ bail:
/* must hold resource->req_lock */
void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
{
+ struct drbd_peer_device *peer_device;
struct drbd_request *req, *r;
- list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests)
- _req_mod(req, what);
+ list_for_each_entry_safe(req, r, &connection->transfer_log, tl_requests) {
+ peer_device = conn_peer_device(connection, req->device->vnr);
+ _req_mod(req, what, peer_device);
+ }
}
void tl_restart(struct drbd_connection *connection, enum drbd_req_event what)
@@ -297,7 +302,7 @@ void tl_abort_disk_io(struct drbd_device *device)
continue;
if (req->device != device)
continue;
- _req_mod(req, ABORT_DISK_IO);
+ _req_mod(req, ABORT_DISK_IO, NULL);
}
spin_unlock_irq(&connection->resource->req_lock);
}
@@ -1198,10 +1203,11 @@ static int fill_bitmap_rle_bits(struct drbd_device *device,
* code upon failure.
*/
static int
-send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
+send_bitmap_rle_or_plain(struct drbd_peer_device *peer_device, struct bm_xfer_ctx *c)
{
- struct drbd_socket *sock = &first_peer_device(device)->connection->data;
- unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
+ struct drbd_device *device = peer_device->device;
+ struct drbd_socket *sock = &peer_device->connection->data;
+ unsigned int header_size = drbd_header_size(peer_device->connection);
struct p_compressed_bm *p = sock->sbuf + header_size;
int len, err;
@@ -1212,7 +1218,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
if (len) {
dcbp_set_code(p, RLE_VLI_Bits);
- err = __send_command(first_peer_device(device)->connection, device->vnr, sock,
+ err = __send_command(peer_device->connection, device->vnr, sock,
P_COMPRESSED_BITMAP, sizeof(*p) + len,
NULL, 0);
c->packets[0]++;
@@ -1233,7 +1239,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
len = num_words * sizeof(*p);
if (len)
drbd_bm_get_lel(device, c->word_offset, num_words, p);
- err = __send_command(first_peer_device(device)->connection, device->vnr, sock, P_BITMAP, len, NULL, 0);
+ err = __send_command(peer_device->connection, device->vnr, sock, P_BITMAP,
+ len, NULL, 0);
c->word_offset += num_words;
c->bit_offset = c->word_offset * BITS_PER_LONG;
@@ -1245,7 +1252,7 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
}
if (!err) {
if (len == 0) {
- INFO_bm_xfer_stats(device, "send", c);
+ INFO_bm_xfer_stats(peer_device, "send", c);
return 0;
} else
return 1;
@@ -1254,7 +1261,8 @@ send_bitmap_rle_or_plain(struct drbd_device *device, struct bm_xfer_ctx *c)
}
/* See the comment at receive_bitmap() */
-static int _drbd_send_bitmap(struct drbd_device *device)
+static int _drbd_send_bitmap(struct drbd_device *device,
+ struct drbd_peer_device *peer_device)
{
struct bm_xfer_ctx c;
int err;
@@ -1266,7 +1274,7 @@ static int _drbd_send_bitmap(struct drbd_device *device)
if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC)) {
drbd_info(device, "Writing the whole bitmap, MDF_FullSync was set.\n");
drbd_bm_set_all(device);
- if (drbd_bm_write(device)) {
+ if (drbd_bm_write(device, peer_device)) {
/* write_bm did fail! Leave full sync flag set in Meta P_DATA
* but otherwise process as per normal - need to tell other
* side that a full resync is required! */
@@ -1285,20 +1293,20 @@ static int _drbd_send_bitmap(struct drbd_device *device)
};
do {
- err = send_bitmap_rle_or_plain(device, &c);
+ err = send_bitmap_rle_or_plain(peer_device, &c);
} while (err > 0);
return err == 0;
}
-int drbd_send_bitmap(struct drbd_device *device)
+int drbd_send_bitmap(struct drbd_device *device, struct drbd_peer_device *peer_device)
{
- struct drbd_socket *sock = &first_peer_device(device)->connection->data;
+ struct drbd_socket *sock = &peer_device->connection->data;
int err = -1;
mutex_lock(&sock->mutex);
if (sock->socket)
- err = !_drbd_send_bitmap(device);
+ err = !_drbd_send_bitmap(device, peer_device);
mutex_unlock(&sock->mutex);
return err;
}
@@ -3406,7 +3414,9 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
*
* Sets all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
+int drbd_bmio_set_n_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
+
{
int rv = -EIO;
@@ -3414,7 +3424,7 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
drbd_md_sync(device);
drbd_bm_set_all(device);
- rv = drbd_bm_write(device);
+ rv = drbd_bm_write(device, peer_device);
if (!rv) {
drbd_md_clear_flag(device, MDF_FULL_SYNC);
@@ -3430,11 +3440,13 @@ int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
*
* Clears all bits in the bitmap and writes the whole bitmap to stable storage.
*/
-int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
+int drbd_bmio_clear_n_write(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
+
{
drbd_resume_al(device);
drbd_bm_clear_all(device);
- return drbd_bm_write(device);
+ return drbd_bm_write(device, peer_device);
}
static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3453,7 +3465,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
if (get_ldev(device)) {
drbd_bm_lock(device, work->why, work->flags);
- rv = work->io_fn(device);
+ rv = work->io_fn(device, work->peer_device);
drbd_bm_unlock(device);
put_ldev(device);
}
@@ -3488,11 +3500,12 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
* put_ldev().
*/
void drbd_queue_bitmap_io(struct drbd_device *device,
- int (*io_fn)(struct drbd_device *),
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
void (*done)(struct drbd_device *, int),
- char *why, enum bm_flag flags)
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device)
{
- D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
+ D_ASSERT(device, current == peer_device->connection->worker.task);
D_ASSERT(device, !test_bit(BITMAP_IO_QUEUED, &device->flags));
D_ASSERT(device, !test_bit(BITMAP_IO, &device->flags));
@@ -3501,6 +3514,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
drbd_err(device, "FIXME going to queue '%s' but '%s' still pending?\n",
why, device->bm_io_work.why);
+ device->bm_io_work.peer_device = peer_device;
device->bm_io_work.io_fn = io_fn;
device->bm_io_work.done = done;
device->bm_io_work.why = why;
@@ -3512,7 +3526,7 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
* application IO does not conflict anyways. */
if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
- drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+ drbd_queue_work(&peer_device->connection->sender_work,
&device->bm_io_work.w);
}
spin_unlock_irq(&device->resource->req_lock);
@@ -3528,8 +3542,10 @@ void drbd_queue_bitmap_io(struct drbd_device *device,
* freezes application IO while that the actual IO operations runs. This
* functions MAY NOT be called from worker context.
*/
-int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *),
- char *why, enum bm_flag flags)
+int drbd_bitmap_io(struct drbd_device *device,
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device)
{
/* Only suspend io, if some operation is supposed to be locked out */
const bool do_suspend_io = flags & (BM_DONT_CLEAR|BM_DONT_SET|BM_DONT_TEST);
@@ -3541,7 +3557,7 @@ int drbd_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *
drbd_suspend_io(device);
drbd_bm_lock(device, why, flags);
- rv = io_fn(device);
+ rv = io_fn(device, peer_device);
drbd_bm_unlock(device);
if (do_suspend_io)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f49f2a5282e1..1a5d3d72d91d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1053,7 +1053,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
- "size changed", BM_LOCKED_MASK);
+ "size changed", BM_LOCKED_MASK, NULL);
/* on-disk bitmap and activity log is authoritative again
* (unless there was an IO error meanwhile...) */
@@ -2027,13 +2027,15 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
drbd_info(device, "Assuming that all blocks are out of sync "
"(aka FullSync)\n");
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
- "set_n_write from attaching", BM_LOCKED_MASK)) {
+ "set_n_write from attaching", BM_LOCKED_MASK,
+ NULL)) {
retcode = ERR_IO_MD_DISK;
goto force_diskless_dec;
}
} else {
if (drbd_bitmap_io(device, &drbd_bm_read,
- "read from attaching", BM_LOCKED_MASK)) {
+ "read from attaching", BM_LOCKED_MASK,
+ NULL)) {
retcode = ERR_IO_MD_DISK;
goto force_diskless_dec;
}
@@ -2972,7 +2974,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
if (retcode >= SS_SUCCESS) {
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
- "set_n_write from invalidate", BM_LOCKED_MASK))
+ "set_n_write from invalidate", BM_LOCKED_MASK, NULL))
retcode = ERR_IO_MD_DISK;
}
} else
@@ -3005,11 +3007,12 @@ out:
return 0;
}
-static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
+static int drbd_bmio_set_susp_al(struct drbd_device *device,
+ struct drbd_peer_device *peer_device) __must_hold(local)
{
int rv;
- rv = drbd_bmio_set_n_write(device);
+ rv = drbd_bmio_set_n_write(device, peer_device);
drbd_suspend_al(device);
return rv;
}
@@ -3052,7 +3055,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
if (retcode >= SS_SUCCESS) {
if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
"set_n_write from invalidate_peer",
- BM_LOCKED_SET_ALLOWED))
+ BM_LOCKED_SET_ALLOWED, NULL))
retcode = ERR_IO_MD_DISK;
}
} else
@@ -4148,7 +4151,7 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
if (args.clear_bm) {
err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
- "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
+ "clear_n_write from new_c_uuid", BM_LOCKED_MASK, NULL);
if (err) {
drbd_err(device, "Writing bitmap failed with %d\n", err);
retcode = ERR_IO_MD_DISK;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e197b2a465d2..719a7260c22b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2044,11 +2044,11 @@ static int e_end_resync_block(struct drbd_work *w, int unused)
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
- drbd_set_in_sync(device, sector, peer_req->i.size);
+ drbd_set_in_sync(peer_device, sector, peer_req->i.size);
err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
} else {
/* Record failure to sync */
- drbd_rs_failed_io(device, sector, peer_req->i.size);
+ drbd_rs_failed_io(peer_device, sector, peer_req->i.size);
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
}
@@ -2067,7 +2067,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
if (!peer_req)
goto fail;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
inc_unacked(device);
/* corresponding dec_unacked() in e_end_resync_block()
@@ -2138,7 +2138,7 @@ static int receive_DataReply(struct drbd_connection *connection, struct packet_i
err = recv_dless_read(peer_device, req, sector, pi->size);
if (!err)
- req_mod(req, DATA_RECEIVED);
+ req_mod(req, DATA_RECEIVED, peer_device);
/* else: nothing. handled from drbd_disconnect...
* I don't think we may complete this just yet
* in case we are "on-disconnect: freeze" */
@@ -2196,7 +2196,7 @@ static void restart_conflicting_writes(struct drbd_device *device,
continue;
/* as it is RQ_POSTPONED, this will cause it to
* be queued on the retry workqueue. */
- __req_mod(req, CONFLICT_RESOLVED, NULL);
+ __req_mod(req, CONFLICT_RESOLVED, NULL, NULL);
}
}
@@ -2220,7 +2220,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
P_RS_WRITE_ACK : P_WRITE_ACK;
err = drbd_send_ack(peer_device, pcmd, peer_req);
if (pcmd == P_RS_WRITE_ACK)
- drbd_set_in_sync(device, sector, peer_req->i.size);
+ drbd_set_in_sync(peer_device, sector, peer_req->i.size);
} else {
err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
/* we expect it to be marked out of sync anyways...
@@ -2420,6 +2420,7 @@ static blk_opf_t wire_flags_to_bio(struct drbd_connection *connection, u32 dpf)
static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
unsigned int size)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_interval *i;
repeat:
@@ -2433,7 +2434,7 @@ static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
if (!(req->rq_state & RQ_POSTPONED))
continue;
req->rq_state &= ~RQ_POSTPONED;
- __req_mod(req, NEG_ACKED, &m);
+ __req_mod(req, NEG_ACKED, peer_device, &m);
spin_unlock_irq(&device->resource->req_lock);
if (m.bio)
complete_master_bio(device, &m);
@@ -2690,7 +2691,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
if (device->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
- drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
drbd_al_begin_io(device, &peer_req->i);
peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
@@ -2729,9 +2730,10 @@ out_interrupted:
* The current sync rate used here uses only the most recent two step marks,
* to have a short time average so we can react faster.
*/
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+bool drbd_rs_should_slow_down(struct drbd_peer_device *peer_device, sector_t sector,
bool throttle_if_app_is_waiting)
{
+ struct drbd_device *device = peer_device->device;
struct lc_element *tmp;
bool throttle = drbd_rs_c_min_rate_throttle(device);
@@ -2843,7 +2845,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
break;
case P_OV_REPLY:
verb = 0;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
break;
default:
@@ -2914,7 +2916,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
/* track progress, we may need to throttle */
atomic_add(size >> 9, &device->rs_sect_in);
peer_req->w.cb = w_e_end_ov_reply;
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
/* drbd_rs_begin_io done when we sent this request,
* but accounting still needs to be done. */
goto submit_for_resync;
@@ -2977,7 +2979,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
update_receiver_timing_details(connection, drbd_rs_should_slow_down);
if (device->state.peer != R_PRIMARY
- && drbd_rs_should_slow_down(device, sector, false))
+ && drbd_rs_should_slow_down(peer_device, sector, false))
schedule_timeout_uninterruptible(HZ/10);
update_receiver_timing_details(connection, drbd_rs_begin_io);
if (drbd_rs_begin_io(device, sector))
@@ -3226,10 +3228,11 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
-1096 requires proto 96
*/
-static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
+static int drbd_uuid_compare(struct drbd_peer_device *const peer_device,
+ enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
{
- struct drbd_peer_device *const peer_device = first_peer_device(device);
- struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
+ struct drbd_connection *const connection = peer_device->connection;
+ struct drbd_device *device = peer_device->device;
u64 self, peer;
int i, j;
@@ -3465,7 +3468,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
drbd_uuid_dump(device, "peer", device->p_uuid,
device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
- hg = drbd_uuid_compare(device, peer_role, &rule_nr);
+ hg = drbd_uuid_compare(peer_device, peer_role, &rule_nr);
spin_unlock_irq(&device->ldev->md.uuid_lock);
drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
@@ -3591,7 +3594,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
if (abs(hg) >= 2) {
drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
- BM_LOCKED_SET_ALLOWED))
+ BM_LOCKED_SET_ALLOWED, NULL))
return C_MASK;
}
@@ -4270,7 +4273,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
"clear_n_write from receive_uuids",
- BM_LOCKED_TEST_ALLOWED);
+ BM_LOCKED_TEST_ALLOWED, NULL);
_drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
_drbd_uuid_set(device, UI_BITMAP, 0);
_drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
@@ -4448,7 +4451,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
else if (os.conn >= C_SYNC_SOURCE &&
peer_state.conn == C_CONNECTED) {
if (drbd_bm_total_weight(device) <= device->rs_failed)
- drbd_resync_finished(device);
+ drbd_resync_finished(peer_device);
return 0;
}
}
@@ -4456,8 +4459,8 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
/* explicit verify finished notification, stop sector reached. */
if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(peer_device);
+ drbd_resync_finished(peer_device);
return 0;
}
@@ -4766,11 +4769,11 @@ decode_bitmap_c(struct drbd_peer_device *peer_device,
return -EIO;
}
-void INFO_bm_xfer_stats(struct drbd_device *device,
+void INFO_bm_xfer_stats(struct drbd_peer_device *peer_device,
const char *direction, struct bm_xfer_ctx *c)
{
/* what would it take to transfer it "plaintext" */
- unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
+ unsigned int header_size = drbd_header_size(peer_device->connection);
unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
unsigned int plain =
header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
@@ -4794,7 +4797,7 @@ void INFO_bm_xfer_stats(struct drbd_device *device,
r = 1000;
r = 1000 - r;
- drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
+ drbd_info(peer_device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
"total %u; compression: %u.%u%%\n",
direction,
c->bytes[1], c->packets[1],
@@ -4872,12 +4875,12 @@ static int receive_bitmap(struct drbd_connection *connection, struct packet_info
goto out;
}
- INFO_bm_xfer_stats(device, "receive", &c);
+ INFO_bm_xfer_stats(peer_device, "receive", &c);
if (device->state.conn == C_WF_BITMAP_T) {
enum drbd_state_rv rv;
- err = drbd_send_bitmap(device);
+ err = drbd_send_bitmap(device, peer_device);
if (err)
goto out;
/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
@@ -4935,7 +4938,7 @@ static int receive_out_of_sync(struct drbd_connection *connection, struct packet
drbd_conn_str(device->state.conn));
}
- drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+ drbd_set_out_of_sync(peer_device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
return 0;
}
@@ -4956,7 +4959,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
sector = be64_to_cpu(p->sector);
size = be32_to_cpu(p->blksize);
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
if (get_ldev(device)) {
struct drbd_peer_request *peer_req;
@@ -5214,7 +5217,7 @@ static int drbd_disconnected(struct drbd_peer_device *peer_device)
if (get_ldev(device)) {
drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
- "write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
+ "write from disconnected", BM_LOCKED_CHANGE_ALLOWED, NULL);
put_ldev(device);
}
@@ -5648,22 +5651,23 @@ static int got_IsInSync(struct drbd_connection *connection, struct packet_info *
if (get_ldev(device)) {
drbd_rs_complete_io(device, sector);
- drbd_set_in_sync(device, sector, blksize);
+ drbd_set_in_sync(peer_device, sector, blksize);
/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
put_ldev(device);
}
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
atomic_add(blksize >> 9, &device->rs_sect_in);
return 0;
}
static int
-validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
+validate_req_change_req_state(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
struct rb_root *root, const char *func,
enum drbd_req_event what, bool missing_ok)
{
+ struct drbd_device *device = peer_device->device;
struct drbd_request *req;
struct bio_and_error m;
@@ -5673,7 +5677,7 @@ validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t secto
spin_unlock_irq(&device->resource->req_lock);
return -EIO;
}
- __req_mod(req, what, &m);
+ __req_mod(req, what, peer_device, &m);
spin_unlock_irq(&device->resource->req_lock);
if (m.bio)
@@ -5698,8 +5702,8 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (p->block_id == ID_SYNCER) {
- drbd_set_in_sync(device, sector, blksize);
- dec_rs_pending(device);
+ drbd_set_in_sync(peer_device, sector, blksize);
+ dec_rs_pending(peer_device);
return 0;
}
switch (pi->cmd) {
@@ -5722,7 +5726,7 @@ static int got_BlockAck(struct drbd_connection *connection, struct packet_info *
BUG();
}
- return validate_req_change_req_state(device, p->block_id, sector,
+ return validate_req_change_req_state(peer_device, p->block_id, sector,
&device->write_requests, __func__,
what, false);
}
@@ -5744,12 +5748,12 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (p->block_id == ID_SYNCER) {
- dec_rs_pending(device);
- drbd_rs_failed_io(device, sector, size);
+ dec_rs_pending(peer_device);
+ drbd_rs_failed_io(peer_device, sector, size);
return 0;
}
- err = validate_req_change_req_state(device, p->block_id, sector,
+ err = validate_req_change_req_state(peer_device, p->block_id, sector,
&device->write_requests, __func__,
NEG_ACKED, true);
if (err) {
@@ -5758,7 +5762,7 @@ static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi
request is no longer in the collision hash. */
/* In Protocol B we might already have got a P_RECV_ACK
but then get a P_NEG_ACK afterwards. */
- drbd_set_out_of_sync(device, sector, size);
+ drbd_set_out_of_sync(peer_device, sector, size);
}
return 0;
}
@@ -5780,7 +5784,7 @@ static int got_NegDReply(struct drbd_connection *connection, struct packet_info
drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
(unsigned long long)sector, be32_to_cpu(p->blksize));
- return validate_req_change_req_state(device, p->block_id, sector,
+ return validate_req_change_req_state(peer_device, p->block_id, sector,
&device->read_requests, __func__,
NEG_ACKED, false);
}
@@ -5803,13 +5807,13 @@ static int got_NegRSDReply(struct drbd_connection *connection, struct packet_inf
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
if (get_ldev_if_state(device, D_FAILED)) {
drbd_rs_complete_io(device, sector);
switch (pi->cmd) {
case P_NEG_RS_DREPLY:
- drbd_rs_failed_io(device, sector, size);
+ drbd_rs_failed_io(peer_device, sector, size);
break;
case P_RS_CANCEL:
break;
@@ -5866,21 +5870,21 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
- drbd_ov_out_of_sync_found(device, sector, size);
+ drbd_ov_out_of_sync_found(peer_device, sector, size);
else
- ov_out_of_sync_print(device);
+ ov_out_of_sync_print(peer_device);
if (!get_ldev(device))
return 0;
drbd_rs_complete_io(device, sector);
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
--device->ov_left;
/* let's advance progress step marks only for every other megabyte */
if ((device->ov_left & 0x200) == 0x200)
- drbd_advance_rs_marks(device, device->ov_left);
+ drbd_advance_rs_marks(peer_device, device->ov_left);
if (device->ov_left == 0) {
dw = kmalloc(sizeof(*dw), GFP_NOIO);
@@ -5890,8 +5894,8 @@ static int got_OVResult(struct drbd_connection *connection, struct packet_info *
drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
} else {
drbd_err(device, "kmalloc(dw) failed.");
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(peer_device);
+ drbd_resync_finished(peer_device);
}
}
put_ldev(device);
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index e36216d50753..380e6584a4ee 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -122,12 +122,13 @@ void drbd_req_destroy(struct kref *kref)
* before it even was submitted or sent.
* In that case we do not want to touch the bitmap at all.
*/
+ struct drbd_peer_device *peer_device = first_peer_device(device);
if ((s & (RQ_POSTPONED|RQ_LOCAL_MASK|RQ_NET_MASK)) != RQ_POSTPONED) {
if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
- drbd_set_out_of_sync(device, req->i.sector, req->i.size);
+ drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
- drbd_set_in_sync(device, req->i.sector, req->i.size);
+ drbd_set_in_sync(peer_device, req->i.sector, req->i.size);
}
/* one might be tempted to move the drbd_al_complete_io
@@ -552,12 +553,15 @@ static inline bool is_pending_write_protocol_A(struct drbd_request *req)
* happen "atomically" within the req_lock,
* and it enforces that we have to think in a very structured manner
* about the "events" that may happen to a request during its life time ...
+ *
+ *
+ * peer_device == NULL means local disk
*/
int __req_mod(struct drbd_request *req, enum drbd_req_event what,
+ struct drbd_peer_device *peer_device,
struct bio_and_error *m)
{
struct drbd_device *const device = req->device;
- struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
struct net_conf *nc;
int p, rv = 0;
@@ -617,7 +621,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
case READ_COMPLETED_WITH_ERROR:
- drbd_set_out_of_sync(device, req->i.sector, req->i.size);
+ drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size);
drbd_report_io_error(device, req);
__drbd_chk_io_error(device, DRBD_READ_ERROR);
fallthrough;
@@ -1100,6 +1104,7 @@ static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
static int drbd_process_write_request(struct drbd_request *req)
{
struct drbd_device *device = req->device;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
int remote, send_oos;
remote = drbd_should_do_remote(device->state);
@@ -1115,7 +1120,7 @@ static int drbd_process_write_request(struct drbd_request *req)
/* The only size==0 bios we expect are empty flushes. */
D_ASSERT(device, req->master_bio->bi_opf & REQ_PREFLUSH);
if (remote)
- _req_mod(req, QUEUE_AS_DRBD_BARRIER);
+ _req_mod(req, QUEUE_AS_DRBD_BARRIER, peer_device);
return remote;
}
@@ -1125,10 +1130,10 @@ static int drbd_process_write_request(struct drbd_request *req)
D_ASSERT(device, !(remote && send_oos));
if (remote) {
- _req_mod(req, TO_BE_SENT);
- _req_mod(req, QUEUE_FOR_NET_WRITE);
- } else if (drbd_set_out_of_sync(device, req->i.sector, req->i.size))
- _req_mod(req, QUEUE_FOR_SEND_OOS);
+ _req_mod(req, TO_BE_SENT, peer_device);
+ _req_mod(req, QUEUE_FOR_NET_WRITE, peer_device);
+ } else if (drbd_set_out_of_sync(peer_device, req->i.sector, req->i.size))
+ _req_mod(req, QUEUE_FOR_SEND_OOS, peer_device);
return remote;
}
@@ -1312,6 +1317,7 @@ static void drbd_update_plug(struct drbd_plug_cb *plug, struct drbd_request *req
static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
{
struct drbd_resource *resource = device->resource;
+ struct drbd_peer_device *peer_device = first_peer_device(device);
const int rw = bio_data_dir(req->master_bio);
struct bio_and_error m = { NULL, };
bool no_remote = false;
@@ -1375,8 +1381,8 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
/* We either have a private_bio, or we can read from remote.
* Otherwise we had done the goto nodata above. */
if (req->private_bio == NULL) {
- _req_mod(req, TO_BE_SENT);
- _req_mod(req, QUEUE_FOR_NET_READ);
+ _req_mod(req, TO_BE_SENT, peer_device);
+ _req_mod(req, QUEUE_FOR_NET_READ, peer_device);
} else
no_remote = true;
}
@@ -1397,7 +1403,7 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
req->pre_submit_jif = jiffies;
list_add_tail(&req->req_pending_local,
&device->pending_completion[rw == WRITE]);
- _req_mod(req, TO_BE_SUBMITTED);
+ _req_mod(req, TO_BE_SUBMITTED, NULL);
/* but we need to give up the spinlock to submit */
submit_private_bio = true;
} else if (no_remote) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index b4017b5c3fbc..9ae860e7591b 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -267,6 +267,7 @@ struct bio_and_error {
extern void start_new_tl_epoch(struct drbd_connection *connection);
extern void drbd_req_destroy(struct kref *kref);
extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
+ struct drbd_peer_device *peer_device,
struct bio_and_error *m);
extern void complete_master_bio(struct drbd_device *device,
struct bio_and_error *m);
@@ -280,14 +281,15 @@ extern void drbd_restart_request(struct drbd_request *req);
/* use this if you don't want to deal with calling complete_master_bio()
* outside the spinlock, e.g. when walking some list on cleanup. */
-static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
+static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what,
+ struct drbd_peer_device *peer_device)
{
struct drbd_device *device = req->device;
struct bio_and_error m;
int rv;
/* __req_mod possibly frees req, do not touch req after that! */
- rv = __req_mod(req, what, &m);
+ rv = __req_mod(req, what, peer_device, &m);
if (m.bio)
complete_master_bio(device, &m);
@@ -299,7 +301,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
* of the lower level driver completion callback, so we need to
* spin_lock_irqsave here. */
static inline int req_mod(struct drbd_request *req,
- enum drbd_req_event what)
+ enum drbd_req_event what,
+ struct drbd_peer_device *peer_device)
{
unsigned long flags;
struct drbd_device *device = req->device;
@@ -307,7 +310,7 @@ static inline int req_mod(struct drbd_request *req,
int rv;
spin_lock_irqsave(&device->resource->req_lock, flags);
- rv = __req_mod(req, what, &m);
+ rv = __req_mod(req, what, peer_device, &m);
spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (m.bio)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 2aeea295fa28..287a8d1d3f70 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1222,9 +1222,11 @@ void drbd_resume_al(struct drbd_device *device)
}
/* helper for _drbd_set_state */
-static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
+static void set_ov_position(struct drbd_peer_device *peer_device, enum drbd_conns cs)
{
- if (first_peer_device(device)->connection->agreed_pro_version < 90)
+ struct drbd_device *device = peer_device->device;
+
+ if (peer_device->connection->agreed_pro_version < 90)
device->ov_start_sector = 0;
device->rs_total = drbd_bm_bits(device);
device->ov_position = 0;
@@ -1387,7 +1389,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns,
unsigned long now = jiffies;
int i;
- set_ov_position(device, ns.conn);
+ set_ov_position(peer_device, ns.conn);
device->rs_start = now;
device->rs_last_sect_ev = 0;
device->ov_last_oos_size = 0;
@@ -1398,7 +1400,7 @@ _drbd_set_state(struct drbd_device *device, union drbd_state ns,
device->rs_mark_time[i] = now;
}
- drbd_rs_controller_reset(device);
+ drbd_rs_controller_reset(peer_device);
if (ns.conn == C_VERIFY_S) {
drbd_info(device, "Starting Online Verify from sector %llu\n",
@@ -1518,8 +1520,9 @@ static void abw_start_sync(struct drbd_device *device, int rv)
}
int drbd_bitmap_io_from_worker(struct drbd_device *device,
- int (*io_fn)(struct drbd_device *),
- char *why, enum bm_flag flags)
+ int (*io_fn)(struct drbd_device *, struct drbd_peer_device *),
+ char *why, enum bm_flag flags,
+ struct drbd_peer_device *peer_device)
{
int rv;
@@ -1529,7 +1532,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
atomic_inc(&device->suspend_cnt);
drbd_bm_lock(device, why, flags);
- rv = io_fn(device);
+ rv = io_fn(device, peer_device);
drbd_bm_unlock(device);
drbd_resume_io(device);
@@ -1809,7 +1812,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
device->state.conn == C_WF_BITMAP_S)
drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
"send_bitmap (WFBitMapS)",
- BM_LOCKED_TEST_ALLOWED);
+ BM_LOCKED_TEST_ALLOWED, peer_device);
/* Lost contact to peer's copy of the data */
if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
@@ -1839,7 +1842,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
* No harm done if the bitmap still changes,
* redirtied pages will follow later. */
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
- "demote diskless peer", BM_LOCKED_SET_ALLOWED);
+ "demote diskless peer", BM_LOCKED_SET_ALLOWED, peer_device);
put_ldev(device);
}
@@ -1851,7 +1854,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* No changes to the bitmap expected this time, so assert that,
* even though no harm was done if it did change. */
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
- "demote", BM_LOCKED_TEST_ALLOWED);
+ "demote", BM_LOCKED_TEST_ALLOWED, peer_device);
put_ldev(device);
}
@@ -1888,7 +1891,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
/* no other bitmap changes expected during this phase */
drbd_queue_bitmap_io(device,
&drbd_bmio_set_n_write, &abw_start_sync,
- "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
+ "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED,
+ peer_device);
/* first half of local IO error, failure to attach,
* or administrative detach */
@@ -2011,7 +2015,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
(ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
- "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
+ "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED,
+ peer_device);
put_ldev(device);
}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index f46738040d6b..4352a50fbb3f 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -28,8 +28,8 @@
#include "drbd_protocol.h"
#include "drbd_req.h"
-static int make_ov_request(struct drbd_device *, int);
-static int make_resync_request(struct drbd_device *, int);
+static int make_ov_request(struct drbd_peer_device *, int);
+static int make_resync_request(struct drbd_peer_device *, int);
/* endio handlers:
* drbd_md_endio (defined here)
@@ -124,7 +124,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
* In case of a write error, send the neg ack anyways. */
if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
inc_unacked(device);
- drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
+ drbd_set_out_of_sync(peer_device, peer_req->i.sector, peer_req->i.size);
}
spin_lock_irqsave(&device->resource->req_lock, flags);
@@ -276,7 +276,7 @@ void drbd_request_endio(struct bio *bio)
/* not req_mod(), we need irqsave here! */
spin_lock_irqsave(&device->resource->req_lock, flags);
- __req_mod(req, what, &m);
+ __req_mod(req, what, NULL, &m);
spin_unlock_irqrestore(&device->resource->req_lock, flags);
put_ldev(device);
@@ -363,7 +363,7 @@ static int w_e_send_csum(struct drbd_work *w, int cancel)
* drbd_alloc_pages due to pp_in_use > max_buffers. */
drbd_free_peer_req(device, peer_req);
peer_req = NULL;
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
err = drbd_send_drequest_csum(peer_device, sector, size,
digest, digest_size,
P_CSUM_RS_REQUEST);
@@ -430,10 +430,10 @@ int w_resync_timer(struct drbd_work *w, int cancel)
switch (device->state.conn) {
case C_VERIFY_S:
- make_ov_request(device, cancel);
+ make_ov_request(first_peer_device(device), cancel);
break;
case C_SYNC_TARGET:
- make_resync_request(device, cancel);
+ make_resync_request(first_peer_device(device), cancel);
break;
}
@@ -493,8 +493,9 @@ struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
return fb;
}
-static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
+static int drbd_rs_controller(struct drbd_peer_device *peer_device, unsigned int sect_in)
{
+ struct drbd_device *device = peer_device->device;
struct disk_conf *dc;
unsigned int want; /* The number of sectors we want in-flight */
int req_sect; /* Number of sectors to request in this turn */
@@ -545,8 +546,9 @@ static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
return req_sect;
}
-static int drbd_rs_number_requests(struct drbd_device *device)
+static int drbd_rs_number_requests(struct drbd_peer_device *peer_device)
{
+ struct drbd_device *device = peer_device->device;
unsigned int sect_in; /* Number of sectors that came in since the last turn */
int number, mxb;
@@ -556,7 +558,7 @@ static int drbd_rs_number_requests(struct drbd_device *device)
rcu_read_lock();
mxb = drbd_get_max_buffers(device) / 2;
if (rcu_dereference(device->rs_plan_s)->size) {
- number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
+ number = drbd_rs_controller(peer_device, sect_in) >> (BM_BLOCK_SHIFT - 9);
device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
} else {
device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
@@ -580,9 +582,9 @@ static int drbd_rs_number_requests(struct drbd_device *device)
return number;
}
-static int make_resync_request(struct drbd_device *const device, int cancel)
+static int make_resync_request(struct drbd_peer_device *const peer_device, int cancel)
{
- struct drbd_peer_device *const peer_device = first_peer_device(device);
+ struct drbd_device *const device = peer_device->device;
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
unsigned long bit;
sector_t sector;
@@ -598,7 +600,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
if (device->rs_total == 0) {
/* empty resync? */
- drbd_resync_finished(device);
+ drbd_resync_finished(peer_device);
return 0;
}
@@ -618,7 +620,7 @@ static int make_resync_request(struct drbd_device *const device, int cancel)
}
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
- number = drbd_rs_number_requests(device);
+ number = drbd_rs_number_requests(peer_device);
if (number <= 0)
goto requeue;
@@ -653,7 +655,7 @@ next_sector:
sector = BM_BIT_TO_SECT(bit);
- if (drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(peer_device, sector)) {
device->bm_resync_fo = bit;
goto requeue;
}
@@ -729,13 +731,13 @@ next_sector:
} else {
int err;
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
err = drbd_send_drequest(peer_device,
size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
sector, size, ID_SYNCER);
if (err) {
drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
put_ldev(device);
return err;
}
@@ -760,8 +762,9 @@ next_sector:
return 0;
}
-static int make_ov_request(struct drbd_device *device, int cancel)
+static int make_ov_request(struct drbd_peer_device *peer_device, int cancel)
{
+ struct drbd_device *device = peer_device->device;
int number, i, size;
sector_t sector;
const sector_t capacity = get_capacity(device->vdisk);
@@ -770,7 +773,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
if (unlikely(cancel))
return 1;
- number = drbd_rs_number_requests(device);
+ number = drbd_rs_number_requests(peer_device);
sector = device->ov_position;
for (i = 0; i < number; i++) {
@@ -788,7 +791,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
size = BM_BLOCK_SIZE;
- if (drbd_try_rs_begin_io(device, sector)) {
+ if (drbd_try_rs_begin_io(peer_device, sector)) {
device->ov_position = sector;
goto requeue;
}
@@ -796,9 +799,9 @@ static int make_ov_request(struct drbd_device *device, int cancel)
if (sector + (size>>9) > capacity)
size = (capacity-sector)<<9;
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
return 0;
}
sector += BM_SECT_PER_BIT;
@@ -818,8 +821,8 @@ int w_ov_finished(struct drbd_work *w, int cancel)
container_of(w, struct drbd_device_work, w);
struct drbd_device *device = dw->device;
kfree(dw);
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(first_peer_device(device));
+ drbd_resync_finished(first_peer_device(device));
return 0;
}
@@ -831,7 +834,7 @@ static int w_resync_finished(struct drbd_work *w, int cancel)
struct drbd_device *device = dw->device;
kfree(dw);
- drbd_resync_finished(device);
+ drbd_resync_finished(first_peer_device(device));
return 0;
}
@@ -846,9 +849,10 @@ static void ping_peer(struct drbd_device *device)
test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
}
-int drbd_resync_finished(struct drbd_device *device)
+int drbd_resync_finished(struct drbd_peer_device *peer_device)
{
- struct drbd_connection *connection = first_peer_device(device)->connection;
+ struct drbd_device *device = peer_device->device;
+ struct drbd_connection *connection = peer_device->connection;
unsigned long db, dt, dbdt;
unsigned long n_oos;
union drbd_state os, ns;
@@ -1129,7 +1133,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
if (likely(device->state.pdsk >= D_INCONSISTENT)) {
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
err = drbd_send_rs_deallocated(peer_device, peer_req);
else
@@ -1148,7 +1152,7 @@ int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
/* update resync data with failure */
- drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
+ drbd_rs_failed_io(peer_device, peer_req->i.sector, peer_req->i.size);
}
dec_unacked(device);
@@ -1199,12 +1203,12 @@ int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
}
if (eq) {
- drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
+ drbd_set_in_sync(peer_device, peer_req->i.sector, peer_req->i.size);
/* rs_same_csums unit is BM_BLOCK_SIZE */
device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
} else {
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
kfree(di);
@@ -1257,10 +1261,10 @@ int w_e_end_ov_req(struct drbd_work *w, int cancel)
* drbd_alloc_pages due to pp_in_use > max_buffers. */
drbd_free_peer_req(device, peer_req);
peer_req = NULL;
- inc_rs_pending(device);
+ inc_rs_pending(peer_device);
err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
if (err)
- dec_rs_pending(device);
+ dec_rs_pending(peer_device);
kfree(digest);
out:
@@ -1270,15 +1274,16 @@ out:
return err;
}
-void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
+void drbd_ov_out_of_sync_found(struct drbd_peer_device *peer_device, sector_t sector, int size)
{
+ struct drbd_device *device = peer_device->device;
if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
device->ov_last_oos_size += size>>9;
} else {
device->ov_last_oos_start = sector;
device->ov_last_oos_size = size>>9;
}
- drbd_set_out_of_sync(device, sector, size);
+ drbd_set_out_of_sync(peer_device, sector, size);
}
int w_e_end_ov_reply(struct drbd_work *w, int cancel)
@@ -1328,9 +1333,9 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
* drbd_alloc_pages due to pp_in_use > max_buffers. */
drbd_free_peer_req(device, peer_req);
if (!eq)
- drbd_ov_out_of_sync_found(device, sector, size);
+ drbd_ov_out_of_sync_found(peer_device, sector, size);
else
- ov_out_of_sync_print(device);
+ ov_out_of_sync_print(peer_device);
err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
@@ -1341,14 +1346,14 @@ int w_e_end_ov_reply(struct drbd_work *w, int cancel)
/* let's advance progress step marks only for every other megabyte */
if ((device->ov_left & 0x200) == 0x200)
- drbd_advance_rs_marks(device, device->ov_left);
+ drbd_advance_rs_marks(peer_device, device->ov_left);
stop_sector_reached = verify_can_do_stop_sector(device) &&
(sector + (size>>9)) >= device->ov_stop_sector;
if (device->ov_left == 0 || stop_sector_reached) {
- ov_out_of_sync_print(device);
- drbd_resync_finished(device);
+ ov_out_of_sync_print(peer_device);
+ drbd_resync_finished(peer_device);
}
return err;
@@ -1425,7 +1430,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
int err;
if (unlikely(cancel)) {
- req_mod(req, SEND_CANCELED);
+ req_mod(req, SEND_CANCELED, peer_device);
return 0;
}
req->pre_send_jif = jiffies;
@@ -1437,7 +1442,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
maybe_send_barrier(connection, req->epoch);
err = drbd_send_out_of_sync(peer_device, req);
- req_mod(req, OOS_HANDED_TO_NETWORK);
+ req_mod(req, OOS_HANDED_TO_NETWORK, peer_device);
return err;
}
@@ -1457,7 +1462,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
int err;
if (unlikely(cancel)) {
- req_mod(req, SEND_CANCELED);
+ req_mod(req, SEND_CANCELED, peer_device);
return 0;
}
req->pre_send_jif = jiffies;
@@ -1467,7 +1472,7 @@ int w_send_dblock(struct drbd_work *w, int cancel)
connection->send.current_epoch_writes++;
err = drbd_send_dblock(peer_device, req);
- req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
if (do_send_unplug && !err)
pd_send_unplug_remote(peer_device);
@@ -1490,7 +1495,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
int err;
if (unlikely(cancel)) {
- req_mod(req, SEND_CANCELED);
+ req_mod(req, SEND_CANCELED, peer_device);
return 0;
}
req->pre_send_jif = jiffies;
@@ -1502,7 +1507,7 @@ int w_send_read_req(struct drbd_work *w, int cancel)
err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
(unsigned long)req);
- req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+ req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK, peer_device);
if (do_send_unplug && !err)
pd_send_unplug_remote(peer_device);
@@ -1668,8 +1673,9 @@ void drbd_resync_after_changed(struct drbd_device *device)
} while (changed);
}
-void drbd_rs_controller_reset(struct drbd_device *device)
+void drbd_rs_controller_reset(struct drbd_peer_device *peer_device)
{
+ struct drbd_device *device = peer_device->device;
struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
struct fifo_buffer *plan;
@@ -1891,10 +1897,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
rcu_read_unlock();
schedule_timeout_interruptible(timeo);
}
- drbd_resync_finished(device);
+ drbd_resync_finished(peer_device);
}
- drbd_rs_controller_reset(device);
+ drbd_rs_controller_reset(peer_device);
/* ns.conn may already be != device->state.conn,
* we may have been paused in between, or become paused until
* the timer triggers.
@@ -1909,8 +1915,9 @@ out:
mutex_unlock(device->state_mutex);
}
-static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
+static void update_on_disk_bitmap(struct drbd_peer_device *peer_device, bool resync_done)
{
+ struct drbd_device *device = peer_device->device;
struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
device->rs_last_bcast = jiffies;
@@ -1919,7 +1926,7 @@ static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
drbd_bm_write_lazy(device, 0);
if (resync_done && is_sync_state(device->state.conn))
- drbd_resync_finished(device);
+ drbd_resync_finished(peer_device);
drbd_bcast_event(device, &sib);
/* update timestamp, in case it took a while to write out stuff */
@@ -1945,6 +1952,7 @@ static void drbd_ldev_destroy(struct drbd_device *device)
static void go_diskless(struct drbd_device *device)
{
+ struct drbd_peer_device *peer_device = first_peer_device(device);
D_ASSERT(device, device->state.disk == D_FAILED);
/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
@@ -1970,7 +1978,7 @@ static void go_diskless(struct drbd_device *device)
* Any modifications would not be expected anymore, though.
*/
if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
- "detach", BM_LOCKED_TEST_ALLOWED)) {
+ "detach", BM_LOCKED_TEST_ALLOWED, peer_device)) {
if (test_bit(WAS_READ_ERROR, &device->flags)) {
drbd_md_set_flag(device, MDF_FULL_SYNC);
drbd_md_sync(device);
@@ -2017,7 +2025,7 @@ static void do_device_work(struct drbd_device *device, const unsigned long todo)
do_md_sync(device);
if (test_bit(RS_DONE, &todo) ||
test_bit(RS_PROGRESS, &todo))
- update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
+ update_on_disk_bitmap(first_peer_device(device), test_bit(RS_DONE, &todo));
if (test_bit(GO_DISKLESS, &todo))
go_diskless(device);
if (test_bit(DESTROY_DISK, &todo))
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 592cfa8b765a..d445fd0934bd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -325,6 +325,9 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
if (blk_validate_block_size(blksize))
return -EINVAL;
+ if (bytesize < 0)
+ return -EINVAL;
+
nbd->config->bytesize = bytesize;
nbd->config->blksize_bits = __ffs(blksize);
@@ -1111,6 +1114,9 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct nbd_sock *nsock;
int err;
+ /* Arg will be cast to int, check it to avoid overflow */
+ if (arg > INT_MAX)
+ return -EINVAL;
sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
@@ -1934,11 +1940,11 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
}
- if (!info->attrs[NBD_ATTR_SOCKETS]) {
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SOCKETS)) {
pr_err("must specify at least one socket\n");
return -EINVAL;
}
- if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_SIZE_BYTES)) {
pr_err("must specify a size in bytes for the device\n");
return -EINVAL;
}
@@ -2123,7 +2129,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (!info->attrs[NBD_ATTR_INDEX]) {
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
pr_err("must specify an index to disconnect\n");
return -EINVAL;
}
@@ -2161,7 +2167,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
- if (!info->attrs[NBD_ATTR_INDEX]) {
+ if (GENL_REQ_ATTR_CHECK(info, NBD_ATTR_INDEX)) {
pr_err("must specify a device to reconfigure\n");
return -EINVAL;
}
@@ -2325,6 +2331,7 @@ static struct genl_family nbd_genl_family __ro_after_init = {
.n_small_ops = ARRAY_SIZE(nbd_connect_genl_ops),
.resv_start_op = NBD_CMD_STATUS + 1,
.maxattr = NBD_ATTR_MAX,
+ .netnsok = 1,
.policy = nbd_attr_policy,
.mcgrps = nbd_mcast_grps,
.n_mcgrps = ARRAY_SIZE(nbd_mcast_grps),
diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig
index 6bf1f8ca20a2..ff23bb9346d0 100644
--- a/drivers/block/null_blk/Kconfig
+++ b/drivers/block/null_blk/Kconfig
@@ -9,4 +9,4 @@ config BLK_DEV_NULL_BLK
config BLK_DEV_NULL_BLK_FAULT_INJECTION
bool "Support fault injection for Null test block driver"
- depends on BLK_DEV_NULL_BLK && FAULT_INJECTION
+ depends on BLK_DEV_NULL_BLK && FAULT_INJECTION_CONFIGFS
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 9e6b032c8ecc..b195b8b9fe32 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -250,7 +250,7 @@ static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
static inline struct nullb_device *to_nullb_device(struct config_item *item)
{
- return item ? container_of(item, struct nullb_device, item) : NULL;
+ return item ? container_of(to_config_group(item), struct nullb_device, group) : NULL;
}
static inline ssize_t nullb_device_uint_attr_show(unsigned int val, char *page)
@@ -593,8 +593,29 @@ static const struct config_item_type nullb_device_type = {
.ct_owner = THIS_MODULE,
};
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+
+static void nullb_add_fault_config(struct nullb_device *dev)
+{
+ fault_config_init(&dev->timeout_config, "timeout_inject");
+ fault_config_init(&dev->requeue_config, "requeue_inject");
+ fault_config_init(&dev->init_hctx_fault_config, "init_hctx_fault_inject");
+
+ configfs_add_default_group(&dev->timeout_config.group, &dev->group);
+ configfs_add_default_group(&dev->requeue_config.group, &dev->group);
+ configfs_add_default_group(&dev->init_hctx_fault_config.group, &dev->group);
+}
+
+#else
+
+static void nullb_add_fault_config(struct nullb_device *dev)
+{
+}
+
+#endif
+
static struct
-config_item *nullb_group_make_item(struct config_group *group, const char *name)
+config_group *nullb_group_make_group(struct config_group *group, const char *name)
{
struct nullb_device *dev;
@@ -605,9 +626,10 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
if (!dev)
return ERR_PTR(-ENOMEM);
- config_item_init_type_name(&dev->item, name, &nullb_device_type);
+ config_group_init_type_name(&dev->group, name, &nullb_device_type);
+ nullb_add_fault_config(dev);
- return &dev->item;
+ return &dev->group;
}
static void
@@ -645,7 +667,7 @@ static struct configfs_attribute *nullb_group_attrs[] = {
};
static struct configfs_group_operations nullb_group_ops = {
- .make_item = nullb_group_make_item,
+ .make_group = nullb_group_make_group,
.drop_item = nullb_group_drop_item,
};
@@ -676,6 +698,13 @@ static struct nullb_device *null_alloc_dev(void)
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return NULL;
+
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ dev->timeout_config.attr = null_timeout_attr;
+ dev->requeue_config.attr = null_requeue_attr;
+ dev->init_hctx_fault_config.attr = null_init_hctx_attr;
+#endif
+
INIT_RADIX_TREE(&dev->data, GFP_ATOMIC);
INIT_RADIX_TREE(&dev->cache, GFP_ATOMIC);
if (badblocks_init(&dev->badblocks, 0)) {
@@ -1030,8 +1059,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
if (!t_page)
return -ENOMEM;
- src = kmap_atomic(c_page->page);
- dst = kmap_atomic(t_page->page);
+ src = kmap_local_page(c_page->page);
+ dst = kmap_local_page(t_page->page);
for (i = 0; i < PAGE_SECTORS;
i += (nullb->dev->blocksize >> SECTOR_SHIFT)) {
@@ -1043,8 +1072,8 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page)
}
}
- kunmap_atomic(dst);
- kunmap_atomic(src);
+ kunmap_local(dst);
+ kunmap_local(src);
ret = radix_tree_delete_item(&nullb->dev->cache, idx, c_page);
null_free_page(ret);
@@ -1112,7 +1141,6 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
size_t temp, count = 0;
unsigned int offset;
struct nullb_page *t_page;
- void *dst, *src;
while (count < n) {
temp = min_t(size_t, nullb->dev->blocksize, n - count);
@@ -1126,11 +1154,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source,
if (!t_page)
return -ENOSPC;
- src = kmap_atomic(source);
- dst = kmap_atomic(t_page->page);
- memcpy(dst + offset, src + off + count, temp);
- kunmap_atomic(dst);
- kunmap_atomic(src);
+ memcpy_page(t_page->page, offset, source, off + count, temp);
__set_bit(sector & SECTOR_MASK, t_page->bitmap);
@@ -1149,7 +1173,6 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest,
size_t temp, count = 0;
unsigned int offset;
struct nullb_page *t_page;
- void *dst, *src;
while (count < n) {
temp = min_t(size_t, nullb->dev->blocksize, n - count);
@@ -1158,16 +1181,11 @@ static int copy_from_nullb(struct nullb *nullb, struct page *dest,
t_page = null_lookup_page(nullb, sector, false,
!null_cache_active(nullb));
- dst = kmap_atomic(dest);
- if (!t_page) {
- memset(dst + off + count, 0, temp);
- goto next;
- }
- src = kmap_atomic(t_page->page);
- memcpy(dst + off + count, src + offset, temp);
- kunmap_atomic(src);
-next:
- kunmap_atomic(dst);
+ if (t_page)
+ memcpy_page(dest, off + count, t_page->page, offset,
+ temp);
+ else
+ zero_user(dest, off + count, temp);
count += temp;
sector += temp >> SECTOR_SHIFT;
@@ -1178,11 +1196,7 @@ next:
static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
unsigned int len, unsigned int off)
{
- void *dst;
-
- dst = kmap_atomic(page);
- memset(dst + off, 0xFF, len);
- kunmap_atomic(dst);
+ memset_page(page, off, 0xff, len);
}
blk_status_t null_handle_discard(struct nullb_device *dev,
@@ -1529,24 +1543,48 @@ static void null_submit_bio(struct bio *bio)
null_handle_cmd(alloc_cmd(nq, bio), sector, nr_sectors, bio_op(bio));
}
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+
+static bool should_timeout_request(struct request *rq)
+{
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct nullb_device *dev = cmd->nq->dev;
+
+ return should_fail(&dev->timeout_config.attr, 1);
+}
+
+static bool should_requeue_request(struct request *rq)
+{
+ struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+ struct nullb_device *dev = cmd->nq->dev;
+
+ return should_fail(&dev->requeue_config.attr, 1);
+}
+
+static bool should_init_hctx_fail(struct nullb_device *dev)
+{
+ return should_fail(&dev->init_hctx_fault_config.attr, 1);
+}
+
+#else
+
static bool should_timeout_request(struct request *rq)
{
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_timeout_str[0])
- return should_fail(&null_timeout_attr, 1);
-#endif
return false;
}
static bool should_requeue_request(struct request *rq)
{
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_requeue_str[0])
- return should_fail(&null_requeue_attr, 1);
-#endif
return false;
}
+static bool should_init_hctx_fail(struct nullb_device *dev)
+{
+ return false;
+}
+
+#endif
+
static void null_map_queues(struct blk_mq_tag_set *set)
{
struct nullb *nullb = set->driver_data;
@@ -1743,10 +1781,8 @@ static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,
struct nullb *nullb = hctx->queue->queuedata;
struct nullb_queue *nq;
-#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
- if (g_init_hctx_str[0] && should_fail(&null_init_hctx_attr, 1))
+ if (should_init_hctx_fail(nullb->dev))
return -EFAULT;
-#endif
nq = &nullb->queues[hctx_idx];
hctx->driver_data = nq;
@@ -1964,6 +2000,11 @@ static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
static int null_validate_conf(struct nullb_device *dev)
{
+ if (dev->queue_mode == NULL_Q_RQ) {
+ pr_err("legacy IO path is no longer available\n");
+ return -EINVAL;
+ }
+
dev->blocksize = round_down(dev->blocksize, 512);
dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
@@ -2066,9 +2107,6 @@ static int null_add_dev(struct nullb_device *dev)
if (rv)
goto out_cleanup_queues;
- if (!null_setup_fault())
- goto out_cleanup_tags;
-
nullb->tag_set->timeout = 5 * HZ;
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, nullb);
if (IS_ERR(nullb->disk)) {
@@ -2130,10 +2168,10 @@ static int null_add_dev(struct nullb_device *dev)
null_config_discard(nullb);
- if (config_item_name(&dev->item)) {
+ if (config_item_name(&dev->group.cg_item)) {
/* Use configfs dir name as the device name */
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
- "%s", config_item_name(&dev->item));
+ "%s", config_item_name(&dev->group.cg_item));
} else {
sprintf(nullb->disk_name, "nullb%d", nullb->index);
}
@@ -2233,6 +2271,9 @@ static int __init null_init(void)
g_home_node = NUMA_NO_NODE;
}
+ if (!null_setup_fault())
+ return -EINVAL;
+
if (g_queue_mode == NULL_Q_RQ) {
pr_err("legacy IO path is no longer available\n");
return -EINVAL;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index eb5972c50be8..929f659dd255 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -69,7 +69,12 @@ enum {
struct nullb_device {
struct nullb *nullb;
- struct config_item item;
+ struct config_group group;
+#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
+ struct fault_config timeout_config;
+ struct fault_config requeue_config;
+ struct fault_config init_hctx_fault_config;
+#endif
struct radix_tree_root data; /* data stored in the disk */
struct radix_tree_root cache; /* disk cache data */
unsigned long flags; /* device flags */
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 604c1a13c76e..afbef182820b 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -53,7 +53,8 @@
| UBLK_F_NEED_GET_DATA \
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
- | UBLK_F_UNPRIVILEGED_DEV)
+ | UBLK_F_UNPRIVILEGED_DEV \
+ | UBLK_F_CMD_IOCTL_ENCODE)
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
@@ -298,9 +299,7 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
{
- if (ubq->flags & UBLK_F_NEED_GET_DATA)
- return true;
- return false;
+ return ubq->flags & UBLK_F_NEED_GET_DATA;
}
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
@@ -349,25 +348,19 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
static inline bool ublk_queue_can_use_recovery_reissue(
struct ublk_queue *ubq)
{
- if ((ubq->flags & UBLK_F_USER_RECOVERY) &&
- (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE))
- return true;
- return false;
+ return (ubq->flags & UBLK_F_USER_RECOVERY) &&
+ (ubq->flags & UBLK_F_USER_RECOVERY_REISSUE);
}
static inline bool ublk_queue_can_use_recovery(
struct ublk_queue *ubq)
{
- if (ubq->flags & UBLK_F_USER_RECOVERY)
- return true;
- return false;
+ return ubq->flags & UBLK_F_USER_RECOVERY;
}
static inline bool ublk_can_use_recovery(struct ublk_device *ub)
{
- if (ub->dev_info.flags & UBLK_F_USER_RECOVERY)
- return true;
- return false;
+ return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
}
static void ublk_free_disk(struct gendisk *disk)
@@ -428,10 +421,9 @@ static const struct block_device_operations ub_fops = {
#define UBLK_MAX_PIN_PAGES 32
struct ublk_map_data {
- const struct ublk_queue *ubq;
const struct request *rq;
- const struct ublk_io *io;
- unsigned max_bytes;
+ unsigned long ubuf;
+ unsigned int len;
};
struct ublk_io_iter {
@@ -488,18 +480,17 @@ static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data,
return done;
}
-static inline int ublk_copy_user_pages(struct ublk_map_data *data,
- bool to_vm)
+static int ublk_copy_user_pages(struct ublk_map_data *data, bool to_vm)
{
const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0;
- const unsigned long start_vm = data->io->addr;
+ const unsigned long start_vm = data->ubuf;
unsigned int done = 0;
struct ublk_io_iter iter = {
.pg_off = start_vm & (PAGE_SIZE - 1),
.bio = data->rq->bio,
.iter = data->rq->bio->bi_iter,
};
- const unsigned int nr_pages = round_up(data->max_bytes +
+ const unsigned int nr_pages = round_up(data->len +
(start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT;
while (done < nr_pages) {
@@ -512,42 +503,49 @@ static inline int ublk_copy_user_pages(struct ublk_map_data *data,
iter.pages);
if (iter.nr_pages <= 0)
return done == 0 ? iter.nr_pages : done;
- len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm);
+ len = ublk_copy_io_pages(&iter, data->len, to_vm);
for (i = 0; i < iter.nr_pages; i++) {
if (to_vm)
set_page_dirty(iter.pages[i]);
put_page(iter.pages[i]);
}
- data->max_bytes -= len;
+ data->len -= len;
done += iter.nr_pages;
}
return done;
}
+static inline bool ublk_need_map_req(const struct request *req)
+{
+ return ublk_rq_has_data(req) && req_op(req) == REQ_OP_WRITE;
+}
+
+static inline bool ublk_need_unmap_req(const struct request *req)
+{
+ return ublk_rq_has_data(req) && req_op(req) == REQ_OP_READ;
+}
+
static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
struct ublk_io *io)
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+
/*
* no zero copy, we delay copy WRITE request data into ublksrv
* context and the big benefit is that pinning pages in current
* context is pretty fast, see ublk_pin_user_pages
*/
- if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH)
- return rq_bytes;
-
- if (ublk_rq_has_data(req)) {
+ if (ublk_need_map_req(req)) {
struct ublk_map_data data = {
- .ubq = ubq,
.rq = req,
- .io = io,
- .max_bytes = rq_bytes,
+ .ubuf = io->addr,
+ .len = rq_bytes,
};
ublk_copy_user_pages(&data, true);
- return rq_bytes - data.max_bytes;
+ return rq_bytes - data.len;
}
return rq_bytes;
}
@@ -558,19 +556,18 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
- if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) {
+ if (ublk_need_unmap_req(req)) {
struct ublk_map_data data = {
- .ubq = ubq,
.rq = req,
- .io = io,
- .max_bytes = io->res,
+ .ubuf = io->addr,
+ .len = io->res,
};
WARN_ON_ONCE(io->res > rq_bytes);
ublk_copy_user_pages(&data, false);
- return io->res - data.max_bytes;
+ return io->res - data.len;
}
return rq_bytes;
}
@@ -655,14 +652,15 @@ static void ublk_complete_rq(struct request *req)
struct ublk_queue *ubq = req->mq_hctx->driver_data;
struct ublk_io *io = &ubq->ios[req->tag];
unsigned int unmapped_bytes;
+ blk_status_t res = BLK_STS_OK;
/* failed read IO if nothing is read */
if (!io->res && req_op(req) == REQ_OP_READ)
io->res = -EIO;
if (io->res < 0) {
- blk_mq_end_request(req, errno_to_blk_status(io->res));
- return;
+ res = errno_to_blk_status(io->res);
+ goto exit;
}
/*
@@ -671,10 +669,8 @@ static void ublk_complete_rq(struct request *req)
*
* Both the two needn't unmap.
*/
- if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) {
- blk_mq_end_request(req, BLK_STS_OK);
- return;
- }
+ if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE)
+ goto exit;
/* for READ request, writing data in iod->addr to rq buffers */
unmapped_bytes = ublk_unmap_io(ubq, req, io);
@@ -691,6 +687,10 @@ static void ublk_complete_rq(struct request *req)
blk_mq_requeue_request(req, true);
else
__blk_mq_end_request(req, BLK_STS_OK);
+
+ return;
+exit:
+ blk_mq_end_request(req, res);
}
/*
@@ -771,9 +771,7 @@ static inline void __ublk_rq_task_work(struct request *req,
return;
}
- if (ublk_need_get_data(ubq) &&
- (req_op(req) == REQ_OP_WRITE ||
- req_op(req) == REQ_OP_FLUSH)) {
+ if (ublk_need_get_data(ubq) && ublk_need_map_req(req)) {
/*
* We have not handled UBLK_IO_NEED_GET_DATA command yet,
* so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
@@ -1261,6 +1259,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
ublk_queue_cmd(ubq, req);
}
+static inline int ublk_check_cmd_op(u32 cmd_op)
+{
+ u32 ioc_type = _IOC_TYPE(cmd_op);
+
+ if (IS_ENABLED(CONFIG_BLKDEV_UBLK_LEGACY_OPCODES) && ioc_type != 'u')
+ return -EOPNOTSUPP;
+
+ if (ioc_type != 'u' && ioc_type != 0)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
struct ublksrv_io_cmd *ub_cmd)
@@ -1303,10 +1314,15 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
* iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
*/
if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
- ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
+ ^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
+ goto out;
+
+ ret = ublk_check_cmd_op(cmd_op);
+ if (ret)
goto out;
- switch (cmd_op) {
+ ret = -EINVAL;
+ switch (_IOC_NR(cmd_op)) {
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
if (ublk_queue_ready(ubq)) {
@@ -1770,6 +1786,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
+ ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE;
+
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
@@ -2128,7 +2146,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
* know if the specified device is created as unprivileged
* mode.
*/
- if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2)
+ if (_IOC_NR(cmd->cmd_op) != UBLK_CMD_GET_DEV_INFO2)
return 0;
}
@@ -2154,7 +2172,7 @@ static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
dev_path[header->dev_path_len] = 0;
ret = -EINVAL;
- switch (cmd->cmd_op) {
+ switch (_IOC_NR(cmd->cmd_op)) {
case UBLK_CMD_GET_DEV_INFO:
case UBLK_CMD_GET_DEV_INFO2:
case UBLK_CMD_GET_QUEUE_AFFINITY:
@@ -2193,6 +2211,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
struct ublk_device *ub = NULL;
+ u32 cmd_op = cmd->cmd_op;
int ret = -EINVAL;
if (issue_flags & IO_URING_F_NONBLOCK)
@@ -2203,22 +2222,22 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (!(issue_flags & IO_URING_F_SQE128))
goto out;
- if (cmd->cmd_op != UBLK_CMD_ADD_DEV) {
+ ret = ublk_check_cmd_op(cmd_op);
+ if (ret)
+ goto out;
+
+ if (_IOC_NR(cmd_op) != UBLK_CMD_ADD_DEV) {
ret = -ENODEV;
ub = ublk_get_device_from_id(header->dev_id);
if (!ub)
goto out;
ret = ublk_ctrl_uring_cmd_permission(ub, cmd);
- } else {
- /* ADD_DEV permission check is done in command handler */
- ret = 0;
+ if (ret)
+ goto put_dev;
}
- if (ret)
- goto put_dev;
-
- switch (cmd->cmd_op) {
+ switch (_IOC_NR(cmd_op)) {
case UBLK_CMD_START_DEV:
ret = ublk_ctrl_start_dev(ub, cmd);
break;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2055a758541d..7899f5fb4c13 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1202,21 +1202,12 @@ struct dm_crypto_profile {
struct mapped_device *md;
};
-struct dm_keyslot_evict_args {
- const struct blk_crypto_key *key;
- int err;
-};
-
static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
- struct dm_keyslot_evict_args *args = data;
- int err;
+ const struct blk_crypto_key *key = data;
- err = blk_crypto_evict_key(dev->bdev, args->key);
- if (!args->err)
- args->err = err;
- /* Always try to evict the key from all devices. */
+ blk_crypto_evict_key(dev->bdev, key);
return 0;
}
@@ -1229,7 +1220,6 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
{
struct mapped_device *md =
container_of(profile, struct dm_crypto_profile, profile)->md;
- struct dm_keyslot_evict_args args = { key };
struct dm_table *t;
int srcu_idx;
@@ -1242,11 +1232,12 @@ static int dm_keyslot_evict(struct blk_crypto_profile *profile,
if (!ti->type->iterate_devices)
continue;
- ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args);
+ ti->type->iterate_devices(ti, dm_keyslot_evict_callback,
+ (void *)key);
}
dm_put_live_table(md, srcu_idx);
- return args.err;
+ return 0;
}
static int
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index e7cc6ba1b657..920bb68156d2 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -209,76 +209,99 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
return NULL;
}
-static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+static unsigned int optimal_io_size(struct block_device *bdev,
+ unsigned int last_page_size,
+ unsigned int io_size)
+{
+ if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev))
+ return roundup(last_page_size, bdev_io_opt(bdev));
+ return io_size;
+}
+
+static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size,
+ sector_t start, sector_t boundary)
+{
+ if (io_size != opt_size &&
+ start + opt_size / SECTOR_SIZE <= boundary)
+ return opt_size;
+ if (start + io_size / SECTOR_SIZE <= boundary)
+ return io_size;
+
+ /* Overflows boundary */
+ return 0;
+}
+
+static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
+ struct page *page)
{
- struct md_rdev *rdev;
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
+ sector_t offset = mddev->bitmap_info.offset;
+ sector_t ps, sboff, doff;
+ unsigned int size = PAGE_SIZE;
+ unsigned int opt_size = PAGE_SIZE;
+
+ bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
+ if (page->index == store->file_pages - 1) {
+ unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
+
+ if (last_page_size == 0)
+ last_page_size = PAGE_SIZE;
+ size = roundup(last_page_size, bdev_logical_block_size(bdev));
+ opt_size = optimal_io_size(bdev, last_page_size, size);
+ }
+
+ ps = page->index * PAGE_SIZE / SECTOR_SIZE;
+ sboff = rdev->sb_start + offset;
+ doff = rdev->data_offset;
+
+ /* Just make sure we aren't corrupting data or metadata */
+ if (mddev->external) {
+ /* Bitmap could be anywhere. */
+ if (sboff + ps > doff &&
+ sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE))
+ return -EINVAL;
+ } else if (offset < 0) {
+ /* DATA BITMAP METADATA */
+ size = bitmap_io_size(size, opt_size, offset + ps, 0);
+ if (size == 0)
+ /* bitmap runs in to metadata */
+ return -EINVAL;
+
+ if (doff + mddev->dev_sectors > sboff)
+ /* data runs in to bitmap */
+ return -EINVAL;
+ } else if (rdev->sb_start < rdev->data_offset) {
+ /* METADATA BITMAP DATA */
+ size = bitmap_io_size(size, opt_size, sboff + ps, doff);
+ if (size == 0)
+ /* bitmap runs in to data */
+ return -EINVAL;
+ } else {
+ /* DATA METADATA BITMAP - no problems */
+ }
-restart:
- rdev = NULL;
- while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
- int size = PAGE_SIZE;
- loff_t offset = mddev->bitmap_info.offset;
+ md_super_write(mddev, rdev, sboff + ps, (int) size, page);
+ return 0;
+}
- bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
+static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+{
+ struct md_rdev *rdev;
+ struct mddev *mddev = bitmap->mddev;
+ int ret;
- if (page->index == store->file_pages-1) {
- int last_page_size = store->bytes & (PAGE_SIZE-1);
- if (last_page_size == 0)
- last_page_size = PAGE_SIZE;
- size = roundup(last_page_size,
- bdev_logical_block_size(bdev));
- }
- /* Just make sure we aren't corrupting data or
- * metadata
- */
- if (mddev->external) {
- /* Bitmap could be anywhere. */
- if (rdev->sb_start + offset + (page->index
- * (PAGE_SIZE/512))
- > rdev->data_offset
- &&
- rdev->sb_start + offset
- < (rdev->data_offset + mddev->dev_sectors
- + (PAGE_SIZE/512)))
- goto bad_alignment;
- } else if (offset < 0) {
- /* DATA BITMAP METADATA */
- if (offset
- + (long)(page->index * (PAGE_SIZE/512))
- + size/512 > 0)
- /* bitmap runs in to metadata */
- goto bad_alignment;
- if (rdev->data_offset + mddev->dev_sectors
- > rdev->sb_start + offset)
- /* data runs in to bitmap */
- goto bad_alignment;
- } else if (rdev->sb_start < rdev->data_offset) {
- /* METADATA BITMAP DATA */
- if (rdev->sb_start
- + offset
- + page->index*(PAGE_SIZE/512) + size/512
- > rdev->data_offset)
- /* bitmap runs in to data */
- goto bad_alignment;
- } else {
- /* DATA METADATA BITMAP - no problems */
+ do {
+ rdev = NULL;
+ while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
+ ret = __write_sb_page(rdev, bitmap, page);
+ if (ret)
+ return ret;
}
- md_super_write(mddev, rdev,
- rdev->sb_start + offset
- + page->index * (PAGE_SIZE/512),
- size,
- page);
- }
+ } while (wait && md_super_wait(mddev) < 0);
- if (wait && md_super_wait(mddev) < 0)
- goto restart;
return 0;
-
- bad_alignment:
- return -EINVAL;
}
static void md_bitmap_file_kick(struct bitmap *bitmap);
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 6e7797b4e738..4eb72b9dd933 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -223,7 +223,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio_sector < start_sector))
goto out_of_bounds;
- if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) {
+ if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+ md_error(mddev, tmp_dev->rdev);
bio_io_error(bio);
return true;
}
@@ -270,6 +271,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+ char *md_name = mdname(mddev);
+
+ pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
+ md_name, rdev->bdev);
+ }
+}
+
static void linear_quiesce(struct mddev *mddev, int state)
{
}
@@ -286,6 +297,7 @@ static struct md_personality linear_personality =
.hot_add_disk = linear_add,
.size = linear_size,
.quiesce = linear_quiesce,
+ .error_handler = linear_error,
};
static int __init linear_init (void)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 13321dbb5fbc..2003cf86fb8d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -78,7 +78,7 @@
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
-static struct kobj_type md_ktype;
+static const struct kobj_type md_ktype;
struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
@@ -3600,7 +3600,7 @@ static const struct sysfs_ops rdev_sysfs_ops = {
.show = rdev_attr_show,
.store = rdev_attr_store,
};
-static struct kobj_type rdev_ktype = {
+static const struct kobj_type rdev_ktype = {
.release = rdev_free,
.sysfs_ops = &rdev_sysfs_ops,
.default_groups = rdev_default_groups,
@@ -5558,7 +5558,7 @@ static const struct sysfs_ops md_sysfs_ops = {
.show = md_attr_show,
.store = md_attr_store,
};
-static struct kobj_type md_ktype = {
+static const struct kobj_type md_ktype = {
.release = md_kobj_release,
.sysfs_ops = &md_sysfs_ops,
.default_groups = md_attr_groups,
@@ -7974,6 +7974,9 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
return;
mddev->pers->error_handler(mddev, rdev);
+ if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
+ return;
+
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -8029,16 +8032,16 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
} else if (resync > max_sectors) {
resync = max_sectors;
} else {
- resync -= atomic_read(&mddev->recovery_active);
- if (resync < MD_RESYNC_ACTIVE) {
- /*
- * Resync has started, but the subtraction has
- * yielded one of the special values. Force it
- * to active to ensure the status reports an
- * active resync.
- */
+ res = atomic_read(&mddev->recovery_active);
+ /*
+ * Resync has started, but the subtraction has overflowed or
+ * yielded one of the special values. Force it to active to
+ * ensure the status reports an active resync.
+ */
+ if (resync < res || resync - res < MD_RESYNC_ACTIVE)
resync = MD_RESYNC_ACTIVE;
- }
+ else
+ resync -= res;
}
if (resync == MD_RESYNC_NONE) {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index e148e3c83b0d..fd8f260ed5f8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -790,15 +790,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
-static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type)
+static inline bool is_rdev_broken(struct md_rdev *rdev)
{
- if (!disk_live(rdev->bdev->bd_disk)) {
- if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags))
- pr_warn("md: %s: %s array has a missing/failed member\n",
- mdname(rdev->mddev), md_type);
- return true;
- }
- return false;
+ return !disk_live(rdev->bdev->bd_disk);
}
static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index b536befd8898..f8ee9a95e25d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -569,8 +569,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
return true;
}
- if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) {
+ if (unlikely(is_rdev_broken(tmp_dev))) {
bio_io_error(bio);
+ md_error(mddev, tmp_dev);
return true;
}
@@ -592,6 +593,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev)
return;
}
+static void raid0_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+ char *md_name = mdname(mddev);
+
+ pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n",
+ md_name, rdev->bdev);
+ }
+}
+
static void *raid0_takeover_raid45(struct mddev *mddev)
{
struct md_rdev *rdev;
@@ -767,6 +778,7 @@ static struct md_personality raid0_personality=
.size = raid0_size,
.takeover = raid0_takeover,
.quiesce = raid0_quiesce,
+ .error_handler = raid0_error,
};
static int __init raid0_init (void)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6c66357f92f5..4fcfcb350d2b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,9 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
{
write_seqlock_irq(&conf->resync_lock);
- BUG_ON(force && !conf->barrier);
+
+ if (WARN_ON_ONCE(force && !conf->barrier))
+ force = false;
/* Wait until no block IO is waiting (unless 'force') */
wait_event_barrier(conf, force || !conf->nr_waiting);
@@ -995,11 +997,15 @@ static bool stop_waiting_barrier(struct r10conf *conf)
(!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
return true;
- /* move on if recovery thread is blocked by us */
- if (conf->mddev->thread->tsk == current &&
- test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
- conf->nr_queued > 0)
+ /*
+ * move on if io is issued from raid10d(), nr_pending is not released
+ * from original io(see handle_read_error()). All raise barrier is
+ * blocked until this io is done.
+ */
+ if (conf->mddev->thread->tsk == current) {
+ WARN_ON_ONCE(atomic_read(&conf->nr_pending) == 0);
return true;
+ }
return false;
}
@@ -1244,7 +1250,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
}
slot = r10_bio->read_slot;
- if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
+ if (!r10_bio->start_time &&
+ blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
read_bio = bio_alloc_clone(rdev->bdev, bio, gfp, &mddev->bio_set);
@@ -1574,6 +1581,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0;
r10_bio->read_slot = -1;
+ r10_bio->start_time = 0;
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) *
conf->geo.raid_disks);
@@ -1626,7 +1634,7 @@ static void raid10_end_discard_request(struct bio *bio)
/*
* raid10_remove_disk uses smp_mb to make sure rdev is set to
* replacement before setting replacement to NULL. It can read
- * rdev first without barrier protect even replacment is NULL
+ * rdev first without barrier protect even replacement is NULL
*/
smp_rmb();
rdev = conf->mirrors[dev].rdev;
@@ -2609,11 +2617,22 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
int d;
- struct bio *wbio, *wbio2;
+ struct bio *wbio = r10_bio->devs[1].bio;
+ struct bio *wbio2 = r10_bio->devs[1].repl_bio;
+
+ /* Need to test wbio2->bi_end_io before we call
+ * submit_bio_noacct as if the former is NULL,
+ * the latter is free to free wbio2.
+ */
+ if (wbio2 && !wbio2->bi_end_io)
+ wbio2 = NULL;
if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
fix_recovery_read_error(r10_bio);
- end_sync_request(r10_bio);
+ if (wbio->bi_end_io)
+ end_sync_request(r10_bio);
+ if (wbio2)
+ end_sync_request(r10_bio);
return;
}
@@ -2622,14 +2641,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
* and submit the write request
*/
d = r10_bio->devs[1].devnum;
- wbio = r10_bio->devs[1].bio;
- wbio2 = r10_bio->devs[1].repl_bio;
- /* Need to test wbio2->bi_end_io before we call
- * submit_bio_noacct as if the former is NULL,
- * the latter is free to free wbio2.
- */
- if (wbio2 && !wbio2->bi_end_io)
- wbio2 = NULL;
if (wbio->bi_end_io) {
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
@@ -2978,9 +2989,13 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
md_error(mddev, rdev);
rdev_dec_pending(rdev, mddev);
- allow_barrier(conf);
r10_bio->state = 0;
raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
+ /*
+ * allow_barrier after re-submit to ensure no sync io
+ * can be issued while regular io pending.
+ */
+ allow_barrier(conf);
}
static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
@@ -3289,10 +3304,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t chunk_mask = conf->geo.chunk_mask;
int page_idx = 0;
- if (!mempool_initialized(&conf->r10buf_pool))
- if (init_resync(conf))
- return 0;
-
/*
* Allow skipping a full rebuild for incremental assembly
* of a clean array, like RAID1 does.
@@ -3308,6 +3319,10 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
return mddev->dev_sectors - sector_nr;
}
+ if (!mempool_initialized(&conf->r10buf_pool))
+ if (init_resync(conf))
+ return 0;
+
skipped:
max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
@@ -4004,6 +4019,20 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
return nc*fc;
}
+static void raid10_free_conf(struct r10conf *conf)
+{
+ if (!conf)
+ return;
+
+ mempool_exit(&conf->r10bio_pool);
+ kfree(conf->mirrors);
+ kfree(conf->mirrors_old);
+ kfree(conf->mirrors_new);
+ safe_put_page(conf->tmppage);
+ bioset_exit(&conf->bio_split);
+ kfree(conf);
+}
+
static struct r10conf *setup_conf(struct mddev *mddev)
{
struct r10conf *conf = NULL;
@@ -4086,13 +4115,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
return conf;
out:
- if (conf) {
- mempool_exit(&conf->r10bio_pool);
- kfree(conf->mirrors);
- safe_put_page(conf->tmppage);
- bioset_exit(&conf->bio_split);
- kfree(conf);
- }
+ raid10_free_conf(conf);
return ERR_PTR(err);
}
@@ -4129,6 +4152,9 @@ static int raid10_run(struct mddev *mddev)
if (!conf)
goto out;
+ mddev->thread = conf->thread;
+ conf->thread = NULL;
+
if (mddev_is_clustered(conf->mddev)) {
int fc, fo;
@@ -4141,9 +4167,6 @@ static int raid10_run(struct mddev *mddev)
}
}
- mddev->thread = conf->thread;
- conf->thread = NULL;
-
if (mddev->queue) {
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
@@ -4283,10 +4306,7 @@ static int raid10_run(struct mddev *mddev)
out_free_conf:
md_unregister_thread(&mddev->thread);
- mempool_exit(&conf->r10bio_pool);
- safe_put_page(conf->tmppage);
- kfree(conf->mirrors);
- kfree(conf);
+ raid10_free_conf(conf);
mddev->private = NULL;
out:
return -EIO;
@@ -4294,15 +4314,7 @@ out:
static void raid10_free(struct mddev *mddev, void *priv)
{
- struct r10conf *conf = priv;
-
- mempool_exit(&conf->r10bio_pool);
- safe_put_page(conf->tmppage);
- kfree(conf->mirrors);
- kfree(conf->mirrors_old);
- kfree(conf->mirrors_new);
- bioset_exit(&conf->bio_split);
- kfree(conf);
+ raid10_free_conf(priv);
}
static void raid10_quiesce(struct mddev *mddev, int quiesce)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7b820b81d8c2..812a12e3e41a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7716,7 +7716,6 @@ static void raid5_set_io_opt(struct r5conf *conf)
static int raid5_run(struct mddev *mddev)
{
struct r5conf *conf;
- int working_disks = 0;
int dirty_parity_disks = 0;
struct md_rdev *rdev;
struct md_rdev *journal_dev = NULL;
@@ -7912,10 +7911,8 @@ static int raid5_run(struct mddev *mddev)
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
goto abort;
}
- if (test_bit(In_sync, &rdev->flags)) {
- working_disks++;
+ if (test_bit(In_sync, &rdev->flags))
continue;
- }
/* This disc is not fully in-sync. However if it
* just stored parity (beyond the recovery_offset),
* when we don't need to be concerned about the
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index b317ce6c4ec3..596bb11eeba5 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -209,16 +209,16 @@ static inline struct apple_nvme *queue_to_apple_nvme(struct apple_nvme_queue *q)
{
if (q->is_adminq)
return container_of(q, struct apple_nvme, adminq);
- else
- return container_of(q, struct apple_nvme, ioq);
+
+ return container_of(q, struct apple_nvme, ioq);
}
static unsigned int apple_nvme_queue_depth(struct apple_nvme_queue *q)
{
if (q->is_adminq)
return APPLE_NVME_AQ_DEPTH;
- else
- return APPLE_ANS_MAX_QUEUE_DEPTH;
+
+ return APPLE_ANS_MAX_QUEUE_DEPTH;
}
static void apple_nvme_rtkit_crashed(void *cookie)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index d6a9bac91a4c..1bfd52eae2ee 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -450,8 +450,8 @@ bool nvme_cancel_request(struct request *req, void *data)
dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
"Cancelling I/O %d", req->tag);
- /* don't abort one completed request */
- if (blk_mq_request_completed(req))
+ /* don't abort one completed or idle request */
+ if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT)
return true;
nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
@@ -4819,8 +4819,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
u32 aer_notice_type = nvme_aer_subtype(result);
bool requeue = true;
- trace_nvme_async_event(ctrl, aer_notice_type);
-
switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
@@ -4856,7 +4854,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
static void nvme_handle_aer_persistent_error(struct nvme_ctrl *ctrl)
{
- trace_nvme_async_event(ctrl, NVME_AER_ERROR);
dev_warn(ctrl->device, "resetting controller due to AER\n");
nvme_reset_ctrl(ctrl);
}
@@ -4872,6 +4869,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;
+ trace_nvme_async_event(ctrl, result);
switch (aer_type) {
case NVME_AER_NOTICE:
requeue = nvme_handle_aen_notice(ctrl, result);
@@ -4889,7 +4887,6 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
- trace_nvme_async_event(ctrl, aer_type);
ctrl->aen_result = result;
break;
default:
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cd7873de3121..7f25c0fe3a0b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -5,7 +5,6 @@
*/
#include <linux/acpi.h>
-#include <linux/aer.h>
#include <linux/async.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
@@ -2535,7 +2534,6 @@ static int nvme_pci_enable(struct nvme_dev *dev)
nvme_map_cmb(dev);
- pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
result = nvme_pci_configure_admin_queue(dev);
@@ -2600,10 +2598,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_suspend_io_queues(dev);
nvme_suspend_queue(dev, 0);
pci_free_irq_vectors(pdev);
- if (pci_is_enabled(pdev)) {
- pci_disable_pcie_error_reporting(pdev);
+ if (pci_is_enabled(pdev))
pci_disable_device(pdev);
- }
nvme_reap_pending_cqes(dev);
nvme_cancel_tagset(&dev->ctrl);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index bbad26b82b56..0eb79696fb73 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,7 +12,6 @@
#include <linux/string.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
-#include <linux/blk-mq-rdma.h>
#include <linux/blk-integrity.h>
#include <linux/types.h>
#include <linux/list.h>
@@ -464,7 +463,6 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev,
struct nvme_rdma_queue *queue)
{
int ret, comp_vector, idx = nvme_rdma_queue_idx(queue);
- enum ib_poll_context poll_ctx;
/*
* Spread I/O queues completion vectors according their queue index.
@@ -473,15 +471,12 @@ static int nvme_rdma_create_cq(struct ib_device *ibdev,
comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */
- if (nvme_rdma_poll_queue(queue)) {
- poll_ctx = IB_POLL_DIRECT;
+ if (nvme_rdma_poll_queue(queue))
queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->cq_size,
- comp_vector, poll_ctx);
- } else {
- poll_ctx = IB_POLL_SOFTIRQ;
+ comp_vector, IB_POLL_DIRECT);
+ else
queue->ib_cq = ib_cq_pool_get(ibdev, queue->cq_size,
- comp_vector, poll_ctx);
- }
+ comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq);
@@ -2163,10 +2158,8 @@ static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].queue_offset = 0;
}
- blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
- ctrl->device->dev, 0);
- blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
- ctrl->device->dev, 0);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
+ blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
/* map dedicated poll queues only if we have queues left */
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 49c9e7bc9116..bf0230442d57 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -888,6 +888,9 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
size_t consumed = len;
int result;
+ if (unlikely(!queue->rd_enabled))
+ return -EFAULT;
+
while (len) {
switch (nvme_tcp_recv_state(queue)) {
case NVME_TCP_RECV_PDU:
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 6f0eaf6a1528..4fb5922ffdac 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -127,15 +127,12 @@ TRACE_EVENT(nvme_async_event,
),
TP_printk("nvme%d: NVME_AEN=%#08x [%s]",
__entry->ctrl_id, __entry->result,
- __print_symbolic(__entry->result,
- aer_name(NVME_AER_NOTICE_NS_CHANGED),
- aer_name(NVME_AER_NOTICE_ANA),
- aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
- aer_name(NVME_AER_NOTICE_DISC_CHANGED),
- aer_name(NVME_AER_ERROR),
- aer_name(NVME_AER_SMART),
- aer_name(NVME_AER_CSS),
- aer_name(NVME_AER_VS))
+ __print_symbolic(__entry->result & 0x7,
+ aer_name(NVME_AER_ERROR),
+ aer_name(NVME_AER_SMART),
+ aer_name(NVME_AER_NOTICE),
+ aer_name(NVME_AER_CSS),
+ aer_name(NVME_AER_VS))
)
);
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 80099df37314..39cb570f833d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -668,21 +668,11 @@ out:
nvmet_req_complete(req, status);
}
-static bool nvmet_handle_identify_desclist(struct nvmet_req *req)
+static void nvmet_execute_identify_ctrl_nvm(struct nvmet_req *req)
{
- switch (req->cmd->identify.csi) {
- case NVME_CSI_NVM:
- nvmet_execute_identify_desclist(req);
- return true;
- case NVME_CSI_ZNS:
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
- nvmet_execute_identify_desclist(req);
- return true;
- }
- return false;
- default:
- return false;
- }
+ /* Not supported: return zeroes */
+ nvmet_req_complete(req,
+ nvmet_zero_sgl(req, 0, sizeof(struct nvme_id_ctrl_nvm)));
}
static void nvmet_execute_identify(struct nvmet_req *req)
@@ -692,54 +682,49 @@ static void nvmet_execute_identify(struct nvmet_req *req)
switch (req->cmd->identify.cns) {
case NVME_ID_CNS_NS:
+ nvmet_execute_identify_ns(req);
+ return;
+ case NVME_ID_CNS_CTRL:
+ nvmet_execute_identify_ctrl(req);
+ return;
+ case NVME_ID_CNS_NS_ACTIVE_LIST:
+ nvmet_execute_identify_nslist(req);
+ return;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ nvmet_execute_identify_desclist(req);
+ return;
+ case NVME_ID_CNS_CS_NS:
switch (req->cmd->identify.csi) {
case NVME_CSI_NVM:
- return nvmet_execute_identify_ns(req);
- default:
+ /* Not supported */
break;
- }
- break;
- case NVME_ID_CNS_CS_NS:
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
- switch (req->cmd->identify.csi) {
- case NVME_CSI_ZNS:
- return nvmet_execute_identify_cns_cs_ns(req);
- default:
- break;
+ case NVME_CSI_ZNS:
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+ nvmet_execute_identify_ns_zns(req);
+ return;
}
- }
- break;
- case NVME_ID_CNS_CTRL:
- switch (req->cmd->identify.csi) {
- case NVME_CSI_NVM:
- return nvmet_execute_identify_ctrl(req);
+ break;
}
break;
case NVME_ID_CNS_CS_CTRL:
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
- switch (req->cmd->identify.csi) {
- case NVME_CSI_ZNS:
- return nvmet_execute_identify_cns_cs_ctrl(req);
- default:
- break;
- }
- }
- break;
- case NVME_ID_CNS_NS_ACTIVE_LIST:
switch (req->cmd->identify.csi) {
case NVME_CSI_NVM:
- return nvmet_execute_identify_nslist(req);
- default:
+ nvmet_execute_identify_ctrl_nvm(req);
+ return;
+ case NVME_CSI_ZNS:
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+ nvmet_execute_identify_ctrl_zns(req);
+ return;
+ }
break;
}
break;
- case NVME_ID_CNS_NS_DESC_LIST:
- if (nvmet_handle_identify_desclist(req) == true)
- return;
- break;
}
- nvmet_req_cns_error_complete(req);
+ pr_debug("unhandled identify cns %d on qid %d\n",
+ req->cmd->identify.cns, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}
/*
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index 5c16372f3b53..c780af36c1d4 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -614,10 +614,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
struct fcloop_fcpreq *tfcp_req =
container_of(work, struct fcloop_fcpreq, fcp_rcv_work);
struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq;
+ unsigned long flags;
int ret = 0;
bool aborted = false;
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
switch (tfcp_req->inistate) {
case INI_IO_START:
tfcp_req->inistate = INI_IO_ACTIVE;
@@ -626,11 +627,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
aborted = true;
break;
default:
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
WARN_ON(1);
return;
}
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
if (unlikely(aborted))
ret = -ECANCELED;
@@ -655,8 +656,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
container_of(work, struct fcloop_fcpreq, abort_rcv_work);
struct nvmefc_fcp_req *fcpreq;
bool completed = false;
+ unsigned long flags;
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
fcpreq = tfcp_req->fcpreq;
switch (tfcp_req->inistate) {
case INI_IO_ABORTED:
@@ -665,11 +667,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
completed = true;
break;
default:
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
WARN_ON(1);
return;
}
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
if (unlikely(completed)) {
/* remove reference taken in original abort downcall */
@@ -681,9 +683,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
&tfcp_req->tgt_fcp_req);
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
tfcp_req->fcpreq = NULL;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
/* call_host_done releases reference for abort downcall */
@@ -699,11 +701,12 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
struct fcloop_fcpreq *tfcp_req =
container_of(work, struct fcloop_fcpreq, tio_done_work);
struct nvmefc_fcp_req *fcpreq;
+ unsigned long flags;
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
fcpreq = tfcp_req->fcpreq;
tfcp_req->inistate = INI_IO_COMPLETED;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
}
@@ -807,13 +810,14 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
u32 rsplen = 0, xfrlen = 0;
int fcp_err = 0, active, aborted;
u8 op = tgt_fcpreq->op;
+ unsigned long flags;
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
fcpreq = tfcp_req->fcpreq;
active = tfcp_req->active;
aborted = tfcp_req->aborted;
tfcp_req->active = true;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
if (unlikely(active))
/* illegal - call while i/o active */
@@ -821,9 +825,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
if (unlikely(aborted)) {
/* target transport has aborted i/o prior */
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
tfcp_req->active = false;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
tgt_fcpreq->transferred_length = 0;
tgt_fcpreq->fcp_error = -ECANCELED;
tgt_fcpreq->done(tgt_fcpreq);
@@ -880,9 +884,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
break;
}
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
tfcp_req->active = false;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
tgt_fcpreq->transferred_length = xfrlen;
tgt_fcpreq->fcp_error = fcp_err;
@@ -896,15 +900,16 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *tgt_fcpreq)
{
struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq);
+ unsigned long flags;
/*
* mark aborted only in case there were 2 threads in transport
* (one doing io, other doing abort) and only kills ops posted
* after the abort request
*/
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
tfcp_req->aborted = true;
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
tfcp_req->status = NVME_SC_INTERNAL;
@@ -946,6 +951,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
struct fcloop_ini_fcpreq *inireq = fcpreq->private;
struct fcloop_fcpreq *tfcp_req;
bool abortio = true;
+ unsigned long flags;
spin_lock(&inireq->inilock);
tfcp_req = inireq->tfcp_req;
@@ -958,7 +964,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
return;
/* break initiator/target relationship for io */
- spin_lock_irq(&tfcp_req->reqlock);
+ spin_lock_irqsave(&tfcp_req->reqlock, flags);
switch (tfcp_req->inistate) {
case INI_IO_START:
case INI_IO_ACTIVE:
@@ -968,11 +974,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
abortio = false;
break;
default:
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
WARN_ON(1);
return;
}
- spin_unlock_irq(&tfcp_req->reqlock);
+ spin_unlock_irqrestore(&tfcp_req->reqlock, flags);
if (abortio)
/* leave the reference while the work item is scheduled */
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 89bedfcd974c..dc60a22646f7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -581,8 +581,8 @@ bool nvmet_ns_revalidate(struct nvmet_ns *ns);
u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts);
bool nvmet_bdev_zns_enable(struct nvmet_ns *ns);
-void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req);
-void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req);
+void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req);
+void nvmet_execute_identify_ns_zns(struct nvmet_req *req);
void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req);
void nvmet_bdev_execute_zone_mgmt_send(struct nvmet_req *req);
void nvmet_bdev_execute_zone_append(struct nvmet_req *req);
@@ -687,14 +687,6 @@ static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
}
-static inline void nvmet_req_cns_error_complete(struct nvmet_req *req)
-{
- pr_debug("unhandled identify cns %d on qid %d\n",
- req->cmd->identify.cns, req->sq->qid);
- req->error_loc = offsetof(struct nvme_identify, cns);
- nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
-}
-
static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio)
{
if (bio != &req->b.inline_bio)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 66e8f9fd0ca7..ed98df72c76b 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -20,6 +20,31 @@
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+static int param_store_val(const char *str, int *val, int min, int max)
+{
+ int ret, new_val;
+
+ ret = kstrtoint(str, 10, &new_val);
+ if (ret)
+ return -EINVAL;
+
+ if (new_val < min || new_val > max)
+ return -EINVAL;
+
+ *val = new_val;
+ return 0;
+}
+
+static int set_params(const char *str, const struct kernel_param *kp)
+{
+ return param_store_val(str, kp->arg, 0, INT_MAX);
+}
+
+static const struct kernel_param_ops set_param_ops = {
+ .set = set_params,
+ .get = param_get_int,
+};
+
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
@@ -27,8 +52,8 @@
* values that may be unique for some NIC implementations.
*/
static int so_priority;
-module_param(so_priority, int, 0644);
-MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+device_param_cb(so_priority, &set_param_ops, &so_priority, 0644);
+MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority: Default 0");
/* Define a time period (in usecs) that io_work() shall sample an activated
* queue before determining it to be idle. This optional module behavior
@@ -36,9 +61,10 @@ MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
* using advanced interrupt moderation techniques.
*/
static int idle_poll_period_usecs;
-module_param(idle_poll_period_usecs, int, 0644);
+device_param_cb(idle_poll_period_usecs, &set_param_ops,
+ &idle_poll_period_usecs, 0644);
MODULE_PARM_DESC(idle_poll_period_usecs,
- "nvmet tcp io_work poll till idle time period in usecs");
+ "nvmet tcp io_work poll till idle time period in usecs: Default 0");
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
index 7e4292d88016..5b5c1e481722 100644
--- a/drivers/nvme/target/zns.c
+++ b/drivers/nvme/target/zns.c
@@ -70,7 +70,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
return true;
}
-void nvmet_execute_identify_cns_cs_ctrl(struct nvmet_req *req)
+void nvmet_execute_identify_ctrl_zns(struct nvmet_req *req)
{
u8 zasl = req->sq->ctrl->subsys->zasl;
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -95,9 +95,9 @@ out:
nvmet_req_complete(req, status);
}
-void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+void nvmet_execute_identify_ns_zns(struct nvmet_req *req)
{
- struct nvme_id_ns_zns *id_zns;
+ struct nvme_id_ns_zns *id_zns = NULL;
u64 zsze;
u16 status;
u32 mar, mor;
@@ -118,16 +118,18 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
if (status)
goto done;
- if (!bdev_is_zoned(req->ns->bdev)) {
- req->error_loc = offsetof(struct nvme_identify, nsid);
- goto done;
- }
-
if (nvmet_ns_revalidate(req->ns)) {
mutex_lock(&req->ns->subsys->lock);
nvmet_ns_changed(req->ns->subsys, req->ns->nsid);
mutex_unlock(&req->ns->subsys->lock);
}
+
+ if (!bdev_is_zoned(req->ns->bdev)) {
+ status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+ req->error_loc = offsetof(struct nvme_identify, nsid);
+ goto out;
+ }
+
zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
req->ns->blksize_shift;
id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
@@ -148,8 +150,8 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
done:
status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
- kfree(id_zns);
out:
+ kfree(id_zns);
nvmet_req_complete(req, status);
}
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index a9c2a8d76c45..9fbfce735d56 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -73,7 +73,8 @@ static void dasd_profile_init(struct dasd_profile *, struct dentry *);
static void dasd_profile_exit(struct dasd_profile *);
static void dasd_hosts_init(struct dentry *, struct dasd_device *);
static void dasd_hosts_exit(struct dasd_device *);
-
+static int dasd_handle_autoquiesce(struct dasd_device *, struct dasd_ccw_req *,
+ unsigned int);
/*
* SECTION: Operations on the device structure.
*/
@@ -1451,6 +1452,8 @@ int dasd_start_IO(struct dasd_ccw_req *cqr)
case -ENODEV:
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"start_IO: -ENODEV device gone, retry");
+ /* this is equivalent to CC=3 for SSCH report this to EER */
+ dasd_handle_autoquiesce(device, cqr, DASD_EER_STARTIO);
break;
case -EIO:
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
@@ -1953,6 +1956,16 @@ static void __dasd_device_process_final_queue(struct dasd_device *device,
}
/*
+ * check if device should be autoquiesced due to too many timeouts
+ */
+static void __dasd_device_check_autoquiesce_timeout(struct dasd_device *device,
+ struct dasd_ccw_req *cqr)
+{
+ if ((device->default_retries - cqr->retries) >= device->aq_timeouts)
+ dasd_handle_autoquiesce(device, cqr, DASD_EER_TIMEOUTS);
+}
+
+/*
* Take a look at the first request on the ccw queue and check
* if it reached its expire time. If so, terminate the IO.
*/
@@ -1986,6 +1999,7 @@ static void __dasd_device_check_expire(struct dasd_device *device)
"remaining\n", cqr, (cqr->expires/HZ),
cqr->retries);
}
+ __dasd_device_check_autoquiesce_timeout(device, cqr);
}
}
@@ -2325,7 +2339,7 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
/* Non-temporary stop condition will trigger fail fast */
if (device->stopped & ~DASD_STOPPED_PENDING &&
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
- (!dasd_eer_enabled(device))) {
+ !dasd_eer_enabled(device) && device->aq_mask == 0) {
cqr->status = DASD_CQR_FAILED;
cqr->intrc = -ENOLINK;
continue;
@@ -2801,20 +2815,18 @@ restart:
dasd_log_sense(cqr, &cqr->irb);
}
- /* First of all call extended error reporting. */
- if (dasd_eer_enabled(base) &&
- cqr->status == DASD_CQR_FAILED) {
- dasd_eer_write(base, cqr, DASD_EER_FATALERROR);
-
- /* restart request */
+ /*
+ * First call extended error reporting and check for autoquiesce
+ */
+ spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
+ if (cqr->status == DASD_CQR_FAILED &&
+ dasd_handle_autoquiesce(base, cqr, DASD_EER_FATALERROR)) {
cqr->status = DASD_CQR_FILLED;
cqr->retries = 255;
- spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
- dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE);
- spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
- flags);
+ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
goto restart;
}
+ spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
/* Process finished ERP request. */
if (cqr->refers) {
@@ -2856,7 +2868,7 @@ static void __dasd_block_start_head(struct dasd_block *block)
/* Non-temporary stop condition will trigger fail fast */
if (block->base->stopped & ~DASD_STOPPED_PENDING &&
test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
- (!dasd_eer_enabled(block->base))) {
+ !dasd_eer_enabled(block->base) && block->base->aq_mask == 0) {
cqr->status = DASD_CQR_FAILED;
cqr->intrc = -ENOLINK;
dasd_schedule_block_bh(block);
@@ -2941,7 +2953,7 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
return 0;
spin_lock_irq(&cqr->dq->lock);
req = (struct request *) cqr->callback_data;
- blk_mq_requeue_request(req, false);
+ blk_mq_requeue_request(req, true);
spin_unlock_irq(&cqr->dq->lock);
return 0;
@@ -3670,8 +3682,8 @@ int dasd_generic_last_path_gone(struct dasd_device *device)
dev_warn(&device->cdev->dev, "No operational channel path is left "
"for the device\n");
DBF_DEV_EVENT(DBF_WARNING, device, "%s", "last path gone");
- /* First of all call extended error reporting. */
- dasd_eer_write(device, NULL, DASD_EER_NOPATH);
+ /* First call extended error reporting and check for autoquiesce. */
+ dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH);
if (device->state < DASD_STATE_BASIC)
return 0;
@@ -3803,7 +3815,8 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event)
"No verified channel paths remain for the device\n");
DBF_DEV_EVENT(DBF_WARNING, device,
"%s", "last verified path gone");
- dasd_eer_write(device, NULL, DASD_EER_NOPATH);
+ /* First call extended error reporting and check for autoquiesce. */
+ dasd_handle_autoquiesce(device, NULL, DASD_EER_NOPATH);
dasd_device_set_stop_bits(device,
DASD_STOPPED_DC_WAIT);
}
@@ -3825,7 +3838,8 @@ EXPORT_SYMBOL_GPL(dasd_generic_verify_path);
void dasd_generic_space_exhaust(struct dasd_device *device,
struct dasd_ccw_req *cqr)
{
- dasd_eer_write(device, NULL, DASD_EER_NOSPC);
+ /* First call extended error reporting and check for autoquiesce. */
+ dasd_handle_autoquiesce(device, NULL, DASD_EER_NOSPC);
if (device->state < DASD_STATE_BASIC)
return;
@@ -3958,6 +3972,31 @@ void dasd_schedule_requeue(struct dasd_device *device)
}
EXPORT_SYMBOL(dasd_schedule_requeue);
+static int dasd_handle_autoquiesce(struct dasd_device *device,
+ struct dasd_ccw_req *cqr,
+ unsigned int reason)
+{
+ /* in any case write eer message with reason */
+ if (dasd_eer_enabled(device))
+ dasd_eer_write(device, cqr, reason);
+
+ if (!test_bit(reason, &device->aq_mask))
+ return 0;
+
+ /* notify eer about autoquiesce */
+ if (dasd_eer_enabled(device))
+ dasd_eer_write(device, NULL, DASD_EER_AUTOQUIESCE);
+
+ pr_info("%s: The DASD has been put in the quiesce state\n",
+ dev_name(&device->cdev->dev));
+ dasd_device_set_stop_bits(device, DASD_STOPPED_QUIESCE);
+
+ if (device->features & DASD_FEATURE_REQUEUEQUIESCE)
+ dasd_schedule_requeue(device);
+
+ return 1;
+}
+
static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
int rdc_buffer_size,
int magic)
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index df17f0f9cb0f..620fab01b710 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -50,6 +50,7 @@ struct dasd_devmap {
unsigned short features;
struct dasd_device *device;
struct dasd_copy_relation *copy;
+ unsigned int aq_mask;
};
/*
@@ -1476,6 +1477,128 @@ dasd_eer_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR(eer_enabled, 0644, dasd_eer_show, dasd_eer_store);
/*
+ * aq_mask controls if the DASD should be quiesced on certain triggers
+ * The aq_mask attribute is interpreted as bitmap of the DASD_EER_* triggers.
+ */
+static ssize_t dasd_aq_mask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dasd_devmap *devmap;
+ unsigned int aq_mask = 0;
+
+ devmap = dasd_find_busid(dev_name(dev));
+ if (!IS_ERR(devmap))
+ aq_mask = devmap->aq_mask;
+
+ return sysfs_emit(buf, "%d\n", aq_mask);
+}
+
+static ssize_t dasd_aq_mask_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dasd_devmap *devmap;
+ unsigned int val;
+
+ if (kstrtouint(buf, 0, &val) || val > DASD_EER_VALID)
+ return -EINVAL;
+
+ devmap = dasd_devmap_from_cdev(to_ccwdev(dev));
+ if (IS_ERR(devmap))
+ return PTR_ERR(devmap);
+
+ spin_lock(&dasd_devmap_lock);
+ devmap->aq_mask = val;
+ if (devmap->device)
+ devmap->device->aq_mask = devmap->aq_mask;
+ spin_unlock(&dasd_devmap_lock);
+
+ return count;
+}
+
+static DEVICE_ATTR(aq_mask, 0644, dasd_aq_mask_show, dasd_aq_mask_store);
+
+/*
+ * aq_requeue controls if requests are returned to the blocklayer on quiesce
+ * or if requests are only not started
+ */
+static ssize_t dasd_aqr_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dasd_devmap *devmap;
+ int flag;
+
+ devmap = dasd_find_busid(dev_name(dev));
+ if (!IS_ERR(devmap))
+ flag = (devmap->features & DASD_FEATURE_REQUEUEQUIESCE) != 0;
+ else
+ flag = (DASD_FEATURE_DEFAULT &
+ DASD_FEATURE_REQUEUEQUIESCE) != 0;
+ return sysfs_emit(buf, "%d\n", flag);
+}
+
+static ssize_t dasd_aqr_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool val;
+ int rc;
+
+ if (kstrtobool(buf, &val))
+ return -EINVAL;
+
+ rc = dasd_set_feature(to_ccwdev(dev), DASD_FEATURE_REQUEUEQUIESCE, val);
+
+ return rc ? : count;
+}
+
+static DEVICE_ATTR(aq_requeue, 0644, dasd_aqr_show, dasd_aqr_store);
+
+/*
+ * aq_timeouts controls how much retries have to time out until
+ * a device gets autoquiesced
+ */
+static ssize_t
+dasd_aq_timeouts_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dasd_device *device;
+ int len;
+
+ device = dasd_device_from_cdev(to_ccwdev(dev));
+ if (IS_ERR(device))
+ return -ENODEV;
+ len = sysfs_emit(buf, "%u\n", device->aq_timeouts);
+ dasd_put_device(device);
+ return len;
+}
+
+static ssize_t
+dasd_aq_timeouts_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct dasd_device *device;
+ unsigned int val;
+
+ device = dasd_device_from_cdev(to_ccwdev(dev));
+ if (IS_ERR(device))
+ return -ENODEV;
+
+ if ((kstrtouint(buf, 10, &val) != 0) ||
+ val > DASD_RETRIES_MAX || val == 0) {
+ dasd_put_device(device);
+ return -EINVAL;
+ }
+
+ if (val)
+ device->aq_timeouts = val;
+
+ dasd_put_device(device);
+ return count;
+}
+
+static DEVICE_ATTR(aq_timeouts, 0644, dasd_aq_timeouts_show,
+ dasd_aq_timeouts_store);
+
+/*
* expiration time for default requests
*/
static ssize_t
@@ -2324,6 +2447,9 @@ static struct attribute * dasd_attrs[] = {
&dev_attr_copy_pair.attr,
&dev_attr_copy_role.attr,
&dev_attr_ping.attr,
+ &dev_attr_aq_mask.attr,
+ &dev_attr_aq_requeue.attr,
+ &dev_attr_aq_timeouts.attr,
NULL,
};
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 1a69f97e88fb..ade1369fe5ed 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2109,6 +2109,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device)
device->default_retries = DASD_RETRIES;
device->path_thrhld = DASD_ECKD_PATH_THRHLD;
device->path_interval = DASD_ECKD_PATH_INTERVAL;
+ device->aq_timeouts = DASD_RETRIES_MAX;
if (private->conf.gneq) {
value = 1;
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index a4cc772208a6..c956de711cf7 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -387,6 +387,7 @@ void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr,
break;
case DASD_EER_NOPATH:
case DASD_EER_NOSPC:
+ case DASD_EER_AUTOQUIESCE:
dasd_eer_write_standard_trigger(device, NULL, id);
break;
case DASD_EER_STATECHANGE:
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 97adc8a7ae6b..33f812f0e515 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -444,22 +444,22 @@ struct dasd_discipline {
extern struct dasd_discipline *dasd_diag_discipline_pointer;
-/*
- * Notification numbers for extended error reporting notifications:
- * The DASD_EER_DISABLE notification is sent before a dasd_device (and it's
- * eer pointer) is freed. The error reporting module needs to do all necessary
- * cleanup steps.
- * The DASD_EER_TRIGGER notification sends the actual error reports (triggers).
- */
-#define DASD_EER_DISABLE 0
-#define DASD_EER_TRIGGER 1
+/* Trigger IDs for extended error reporting DASD EER and autoquiesce */
+enum eer_trigger {
+ DASD_EER_FATALERROR = 1,
+ DASD_EER_NOPATH,
+ DASD_EER_STATECHANGE,
+ DASD_EER_PPRCSUSPEND,
+ DASD_EER_NOSPC,
+ DASD_EER_TIMEOUTS,
+ DASD_EER_STARTIO,
+
+ /* enum end marker, only add new trigger above */
+ DASD_EER_MAX,
+ DASD_EER_AUTOQUIESCE = 31, /* internal only */
+};
-/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */
-#define DASD_EER_FATALERROR 1
-#define DASD_EER_NOPATH 2
-#define DASD_EER_STATECHANGE 3
-#define DASD_EER_PPRCSUSPEND 4
-#define DASD_EER_NOSPC 5
+#define DASD_EER_VALID ((1U << DASD_EER_MAX) - 1)
/* DASD path handling */
@@ -637,6 +637,8 @@ struct dasd_device {
struct dasd_format_entry format_entry;
struct kset *paths_info;
struct dasd_copy_relation *copy;
+ unsigned long aq_mask;
+ unsigned int aq_timeouts;
};
struct dasd_block {