summaryrefslogtreecommitdiff
path: root/fs/ceph/mds_client.c
diff options
context:
space:
mode:
authorXiubo Li <xiubli@redhat.com>2022-05-10 09:47:01 +0800
committerIlya Dryomov <idryomov@gmail.com>2022-08-03 00:54:12 +0200
commit4868e537fa867f82e38e37429d61d7bb8357d79b (patch)
treebadea4f6181c6755959c8a44c4b84f527da23cad /fs/ceph/mds_client.c
parent4f48d5da81ee7004a789c8aac2d0dfb2514c37f1 (diff)
downloadlwn-4868e537fa867f82e38e37429d61d7bb8357d79b.tar.gz
lwn-4868e537fa867f82e38e37429d61d7bb8357d79b.zip
ceph: wait for the first reply of inflight async unlink
In async unlink case the kclient won't wait for the first reply from MDS and just drop all the links and unhash the dentry and then succeeds immediately. For any new create/link/rename,etc requests followed by using the same file names we must wait for the first reply of the inflight unlink request, or the MDS possibly will fail these following requests with -EEXIST if the inflight async unlink request was delayed for some reasons. And the worst case is that for the none async openc request it will successfully open the file if the CDentry hasn't been unlinked yet, but later the previous delayed async unlink request will remove the CDenty. That means the just created file is possiblly deleted later by accident. We need to wait for the inflight async unlink requests to finish when creating new files/directories by using the same file names. Link: https://tracker.ceph.com/issues/55332 Signed-off-by: Xiubo Li <xiubli@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs/ceph/mds_client.c')
-rw-r--r--fs/ceph/mds_client.c75
1 files changed, 74 insertions, 1 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0aded10375fd..f6da80d110dc 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -456,7 +456,7 @@ static int ceph_parse_deleg_inos(void **p, void *end,
dout("added delegated inode 0x%llx\n",
start - 1);
} else if (err == -EBUSY) {
- pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n",
+ pr_warn("MDS delegated inode 0x%llx more than once.\n",
start - 1);
} else {
return err;
@@ -655,6 +655,79 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
}
+/*
+ * In async unlink case the kclient won't wait for the first reply
+ * from MDS and just drop all the links and unhash the dentry and then
+ * succeeds immediately.
+ *
+ * For any new create/link/rename,etc requests followed by using the
+ * same file names we must wait for the first reply of the inflight
+ * unlink request, or the MDS possibly will fail these following
+ * requests with -EEXIST if the inflight async unlink request was
+ * delayed for some reasons.
+ *
+ * And the worst case is that for the none async openc request it will
+ * successfully open the file if the CDentry hasn't been unlinked yet,
+ * but later the previous delayed async unlink request will remove the
+ * CDenty. That means the just created file is possiblly deleted later
+ * by accident.
+ *
+ * We need to wait for the inflight async unlink requests to finish
+ * when creating new files/directories by using the same file names.
+ */
+int ceph_wait_on_conflict_unlink(struct dentry *dentry)
+{
+ struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
+ struct dentry *pdentry = dentry->d_parent;
+ struct dentry *udentry, *found = NULL;
+ struct ceph_dentry_info *di;
+ struct qstr dname;
+ u32 hash = dentry->d_name.hash;
+ int err;
+
+ dname.name = dentry->d_name.name;
+ dname.len = dentry->d_name.len;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(fsc->async_unlink_conflict, di,
+ hnode, hash) {
+ udentry = di->dentry;
+
+ spin_lock(&udentry->d_lock);
+ if (udentry->d_name.hash != hash)
+ goto next;
+ if (unlikely(udentry->d_parent != pdentry))
+ goto next;
+ if (!hash_hashed(&di->hnode))
+ goto next;
+
+ if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags))
+ pr_warn("%s dentry %p:%pd async unlink bit is not set\n",
+ __func__, dentry, dentry);
+
+ if (!d_same_name(udentry, pdentry, &dname))
+ goto next;
+
+ spin_unlock(&udentry->d_lock);
+ found = dget(udentry);
+ break;
+next:
+ spin_unlock(&udentry->d_lock);
+ }
+ rcu_read_unlock();
+
+ if (likely(!found))
+ return 0;
+
+ dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__,
+ dentry, dentry, found, found);
+
+ err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT,
+ TASK_KILLABLE);
+ dput(found);
+ return err;
+}
+
/*
* sessions