summaryrefslogtreecommitdiff
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2019-05-18 20:39:55 +0800
committerIlya Dryomov <idryomov@gmail.com>2019-06-05 20:34:39 +0200
commit3e1d0452edceebb903d23db53201013c940bf000 (patch)
tree1a6065a9b2fcd52395ae35778cf9de0ad43d0b30 /fs/ceph/mds_client.c
parent1cf89a8dee5e6e9d4fcb81b571a54d40068dfbb7 (diff)
downloadlwn-3e1d0452edceebb903d23db53201013c940bf000.tar.gz
lwn-3e1d0452edceebb903d23db53201013c940bf000.zip
ceph: avoid iput_final() while holding mutex or in dispatch thread
iput_final() may wait for reahahead pages. The wait can cause deadlock. For example: Workqueue: ceph-msgr ceph_con_workfn [libceph] Call Trace: schedule+0x36/0x80 io_schedule+0x16/0x40 __lock_page+0x101/0x140 truncate_inode_pages_range+0x556/0x9f0 truncate_inode_pages_final+0x4d/0x60 evict+0x182/0x1a0 iput+0x1d2/0x220 iterate_session_caps+0x82/0x230 [ceph] dispatch+0x678/0xa80 [ceph] ceph_con_workfn+0x95b/0x1560 [libceph] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ret_from_fork+0x22/0x40 Workqueue: ceph-msgr ceph_con_workfn [libceph] Call Trace: __schedule+0x3d6/0x8b0 schedule+0x36/0x80 schedule_preempt_disabled+0xe/0x10 mutex_lock+0x2f/0x40 ceph_check_caps+0x505/0xa80 [ceph] ceph_put_wrbuffer_cap_refs+0x1e5/0x2c0 [ceph] writepages_finish+0x2d3/0x410 [ceph] __complete_request+0x26/0x60 [libceph] handle_reply+0x6c8/0xa10 [libceph] dispatch+0x29a/0xbb0 [libceph] ceph_con_workfn+0x95b/0x1560 [libceph] process_one_work+0x14d/0x410 worker_thread+0x4b/0x460 kthread+0x105/0x140 ret_from_fork+0x22/0x40 In above example, truncate_inode_pages_range() waits for readahead pages while holding s_mutex. ceph_check_caps() waits for s_mutex and blocks OSD dispatch thread. Later OSD replies (for readahead) can't be handled. ceph_check_caps() also may lock snap_rwsem for read. So similar deadlock can happen if iput_final() is called while holding snap_rwsem. In general, it's not good to call iput_final() inside MDS/OSD dispatch threads or while holding any mutex. The fix is introducing ceph_async_iput(), which calls iput_final() in workqueue. Signed-off-by: "Yan, Zheng" <zyan@redhat.com> Reviewed-by: Jeff Layton <jlayton@redhat.com> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c28
1 files changed, 18 insertions, 10 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 959b1bf7c327..6af2d0d4a87a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -690,11 +690,12 @@ void ceph_mdsc_release_request(struct kref *kref)
ceph_msg_put(req->r_reply);
if (req->r_inode) {
ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
- iput(req->r_inode);
+ /* avoid calling iput_final() in mds dispatch threads */
+ ceph_async_iput(req->r_inode);
}
if (req->r_parent)
ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
- iput(req->r_target_inode);
+ ceph_async_iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
@@ -708,7 +709,7 @@ void ceph_mdsc_release_request(struct kref *kref)
*/
ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
- iput(req->r_old_dentry_dir);
+ ceph_async_iput(req->r_old_dentry_dir);
}
kfree(req->r_path1);
kfree(req->r_path2);
@@ -818,7 +819,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
}
if (req->r_unsafe_dir) {
- iput(req->r_unsafe_dir);
+ /* avoid calling iput_final() in mds dispatch threads */
+ ceph_async_iput(req->r_unsafe_dir);
req->r_unsafe_dir = NULL;
}
@@ -983,7 +985,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
spin_unlock(&ci->i_ceph_lock);
- iput(inode);
+ ceph_async_iput(inode);
goto random;
}
mds = cap->session->s_mds;
@@ -992,7 +994,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
cap == ci->i_auth_cap ? "auth " : "", cap);
spin_unlock(&ci->i_ceph_lock);
out:
- iput(inode);
+ /* avoid calling iput_final() while holding mdsc->mutex or
+ * in mds dispatch threads */
+ ceph_async_iput(inode);
return mds;
random:
@@ -1302,7 +1306,9 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
spin_unlock(&session->s_cap_lock);
if (last_inode) {
- iput(last_inode);
+ /* avoid calling iput_final() while holding
+ * s_mutex or in mds dispatch threads */
+ ceph_async_iput(last_inode);
last_inode = NULL;
}
if (old_cap) {
@@ -1335,7 +1341,7 @@ out:
session->s_cap_iterator = NULL;
spin_unlock(&session->s_cap_lock);
- iput(last_inode);
+ ceph_async_iput(last_inode);
if (old_cap)
ceph_put_cap(session->s_mdsc, old_cap);
@@ -1471,7 +1477,8 @@ static void remove_session_caps(struct ceph_mds_session *session)
spin_unlock(&session->s_cap_lock);
inode = ceph_find_inode(sb, vino);
- iput(inode);
+ /* avoid calling iput_final() while holding s_mutex */
+ ceph_async_iput(inode);
spin_lock(&session->s_cap_lock);
}
@@ -3912,8 +3919,9 @@ release:
ceph_con_send(&session->s_con, msg);
out:
- iput(inode);
mutex_unlock(&session->s_mutex);
+ /* avoid calling iput_final() in mds dispatch threads */
+ ceph_async_iput(inode);
return;
bad: