Merge tag 'for-6.11/block-20240722' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe: - MD fixes via Song: - md-cluster fixes (Heming Zhao) - raid1 fix (Mateusz Jończyk) - s390/dasd module description (Jeff) - Series cleaning up and hardening the blk-mq debugfs flag handling (John, Christoph) - blk-cgroup cleanup (Xiu) - Error polled IO attempts if backend doesn't support it (hexue) - Fix for an sbitmap hang (Yang) * tag 'for-6.11/block-20240722' of git://git.kernel.dk/linux: (23 commits) blk-cgroup: move congestion_count to struct blkcg sbitmap: fix io hung due to race on sbitmap_word::cleared block: avoid polling configuration errors block: Catch possible entries missing from rqf_name[] block: Simplify definition of RQF_NAME() block: Use enum to define RQF_x bit indexes block: Catch possible entries missing from cmd_flag_name[] block: Catch possible entries missing from alloc_policy_name[] block: Catch possible entries missing from hctx_flag_name[] block: Catch possible entries missing from hctx_state_name[] block: Catch possible entries missing from blk_queue_flag_name[] block: Make QUEUE_FLAG_x as an enum block: Relocate BLK_MQ_MAX_DEPTH block: Relocate BLK_MQ_CPU_WORK_BATCH block: remove QUEUE_FLAG_STOPPED block: Add missing entry to hctx_flag_name[] block: Add zone write plugging entry to rqf_name[] block: Add missing entries from cmd_flag_name[] s390/dasd: fix error checks in dasd_copy_pair_store() s390/dasd: add missing MODULE_DESCRIPTION() macros ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-22 11:32:05 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-22 11:32:05 -0700
commit: 7d080fa867092c1db078dd72d70cb256642f7b18 (patch)
tree: 6185f09f14510e8d733d05c68c0aa6fcde262cf0 /drivers/md
parent: 0256994887d7c89c2a41d872aac67605bda8f115 (diff)
parent: 89ed6c9ac69ec398ccb648f5f675b43e8ca679ca (diff)
download: lwn-7d080fa867092c1db078dd72d70cb256642f7b18.tar.gz
lwn-7d080fa867092c1db078dd72d70cb256642f7b18.zip
4 files changed, 56 insertions, 13 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index c1ea214bfc91..1d0db62f0351 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -15,6 +15,7 @@
 
 #define LVB_SIZE	64
 #define NEW_DEV_TIMEOUT 5000
+#define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)
 
 struct dlm_lock_resource {
 	dlm_lockspace_t *ls;
@@ -56,6 +57,7 @@ struct resync_info {
 #define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
 #define		MD_CLUSTER_PENDING_RECV_EVENT		7
 #define 	MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD		8
+#define		MD_CLUSTER_WAITING_FOR_SYNC		9
 
 struct md_cluster_info {
 	struct mddev *mddev; /* the md device which md_cluster_info belongs to */
@@ -91,6 +93,7 @@ struct md_cluster_info {
 	sector_t sync_hi;
 };
 
+/* For compatibility, add the new msg_type at the end. */
 enum msg_type {
 	METADATA_UPDATED = 0,
 	RESYNCING,
@@ -100,6 +103,7 @@ enum msg_type {
 	BITMAP_NEEDS_SYNC,
 	CHANGE_CAPACITY,
 	BITMAP_RESIZE,
+	RESYNCING_START,
 };
 
 struct cluster_msg {
@@ -130,8 +134,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
 			0, sync_ast, res, res->bast);
 	if (ret)
 		return ret;
-	wait_event(res->sync_locking, res->sync_locking_done);
+	ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
+				WAIT_DLM_LOCK_TIMEOUT);
 	res->sync_locking_done = false;
+	if (!ret) {
+		pr_err("locking DLM '%s' timeout!\n", res->name);
+		return -EBUSY;
+	}
 	if (res->lksb.sb_status == 0)
 		res->mode = mode;
 	return res->lksb.sb_status;
@@ -455,6 +464,7 @@ static void process_suspend_info(struct mddev *mddev,
 		clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
 		remove_suspend_info(mddev, slot);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+		clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
 		md_wakeup_thread(mddev->thread);
 		return;
 	}
@@ -525,6 +535,7 @@ static int process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
 		res = -1;
 	}
 	clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
+	set_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
 	return res;
 }
 
@@ -593,6 +604,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 	case CHANGE_CAPACITY:
 		set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
 		break;
+	case RESYNCING_START:
+		clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &mddev->cluster_info->state);
+		break;
 	case RESYNCING:
 		set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
 		process_suspend_info(mddev, le32_to_cpu(msg->slot),
@@ -743,7 +757,7 @@ static void unlock_comm(struct md_cluster_info *cinfo)
  */
 static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 {
-	int error;
+	int error, unlock_error;
 	int slot = cinfo->slot_number - 1;
 
 	cmsg->slot = cpu_to_le32(slot);
@@ -751,7 +765,7 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
 	if (error) {
 		pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
-		goto failed_message;
+		return error;
 	}
 
 	memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
@@ -781,14 +795,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
 	}
 
 failed_ack:
-	error = dlm_unlock_sync(cinfo->message_lockres);
-	if (unlikely(error != 0)) {
+	while ((unlock_error = dlm_unlock_sync(cinfo->message_lockres)))
 		pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
-			error);
-		/* in case the message can't be released due to some reason */
-		goto failed_ack;
-	}
-failed_message:
+			unlock_error);
+
 	return error;
 }
 
@@ -1343,6 +1353,23 @@ static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
 	spin_unlock_irq(&cinfo->suspend_lock);
 }
 
+static int resync_status_get(struct mddev *mddev)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	return test_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
+}
+
+static int resync_start_notify(struct mddev *mddev)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+	struct cluster_msg cmsg = {0};
+
+	cmsg.type = cpu_to_le32(RESYNCING_START);
+
+	return sendmsg(cinfo, &cmsg, 0);
+}
+
 static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1577,6 +1604,8 @@ static const struct md_cluster_operations cluster_ops = {
 	.resync_start = resync_start,
 	.resync_finish = resync_finish,
 	.resync_info_update = resync_info_update,
+	.resync_start_notify = resync_start_notify,
+	.resync_status_get = resync_status_get,
 	.resync_info_get = resync_info_get,
 	.metadata_update_start = metadata_update_start,
 	.metadata_update_finish = metadata_update_finish,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index a78e3021775d..470bf18ffde5 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -14,6 +14,8 @@ struct md_cluster_operations {
 	int (*leave)(struct mddev *mddev);
 	int (*slot_number)(struct mddev *mddev);
 	int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
+	int (*resync_start_notify)(struct mddev *mddev);
+	int (*resync_status_get)(struct mddev *mddev);
 	void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
 	int (*metadata_update_start)(struct mddev *mddev);
 	int (*metadata_update_finish)(struct mddev *mddev);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 64693913ed18..d3a837506a36 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8978,7 +8978,8 @@ void md_do_sync(struct md_thread *thread)
 	 * This will mean we have to start checking from the beginning again.
 	 *
 	 */
-
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->resync_start_notify(mddev);
 	do {
 		int mddev2_minor = -1;
 		mddev->curr_resync = MD_RESYNC_DELAYED;
@@ -9992,8 +9993,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
 			 */
 			if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
 			    !(le32_to_cpu(sb->feature_map) &
-			      MD_FEATURE_RESHAPE_ACTIVE)) {
-				rdev2->saved_raid_disk = role;
+			      MD_FEATURE_RESHAPE_ACTIVE) &&
+			    !md_cluster_ops->resync_status_get(mddev)) {
+				/*
+				 * -1 to make raid1_add_disk() set conf->fullsync
+				 * to 1. This could avoid skipping sync when the
+				 * remote node is down during resyncing.
+				 */
+				if ((le32_to_cpu(sb->feature_map)
+				    & MD_FEATURE_RECOVERY_OFFSET))
+					rdev2->saved_raid_disk = -1;
+				else
+					rdev2->saved_raid_disk = role;
 				ret = remove_and_add_spares(mddev, rdev2);
 				pr_info("Activated spare: %pg\n",
 					rdev2->bdev);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 04a0c2ca1732..7acfe7c9dc8d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -680,6 +680,7 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
 		len = r1_bio->sectors;
 		read_len = raid1_check_read_range(rdev, this_sector, &len);
 		if (read_len == r1_bio->sectors) {
+			*max_sectors = read_len;
 			update_read_sectors(conf, disk, this_sector, read_len);
 			return disk;
 		}
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-22 11:32:05 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-22 11:32:05 -0700
commit	7d080fa867092c1db078dd72d70cb256642f7b18 (patch)
tree	6185f09f14510e8d733d05c68c0aa6fcde262cf0 /drivers/md
parent	0256994887d7c89c2a41d872aac67605bda8f115 (diff)
parent	89ed6c9ac69ec398ccb648f5f675b43e8ca679ca (diff)
download	lwn-7d080fa867092c1db078dd72d70cb256642f7b18.tar.gz lwn-7d080fa867092c1db078dd72d70cb256642f7b18.zip