summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/messenger.c29
-rw-r--r--fs/ceph/messenger.h2
-rw-r--r--fs/ceph/osd_client.c42
-rw-r--r--fs/ceph/osd_client.h4
4 files changed, 67 insertions, 10 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index c03b4185c143..506b638a023b 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1976,6 +1976,35 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
}
/*
+ * Revoke a page vector that we may be reading data into
+ */
+void ceph_con_revoke_pages(struct ceph_connection *con, struct page **pages)
+{
+ mutex_lock(&con->mutex);
+ if (con->in_msg && con->in_msg->pages == pages) {
+ unsigned data_len = le32_to_cpu(con->in_hdr.data_len);
+
+ /* skip rest of message */
+ dout("con_revoke_pages %p msg %p pages %p revoked\n", con,
+ con->in_msg, pages);
+ if (con->in_msg_pos.data_pos < data_len)
+ con->in_base_pos = con->in_msg_pos.data_pos - data_len;
+ else
+ con->in_base_pos = con->in_base_pos -
+ sizeof(struct ceph_msg_header) -
+ sizeof(struct ceph_msg_footer);
+ con->in_msg->pages = NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
+ con->in_tag = CEPH_MSGR_TAG_READY;
+ } else {
+ dout("con_revoke_pages %p msg %p pages %p no-op\n",
+ con, con->in_msg, pages);
+ }
+ mutex_unlock(&con->mutex);
+}
+
+/*
* Queue a keepalive byte to ensure the tcp connection is alive.
*/
void ceph_con_keepalive(struct ceph_connection *con)
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 94b55de90331..7e2aab1d3ce2 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -230,6 +230,8 @@ extern void ceph_con_open(struct ceph_connection *con,
extern void ceph_con_close(struct ceph_connection *con);
extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
+extern void ceph_con_revoke_pages(struct ceph_connection *con,
+ struct page **pages);
extern void ceph_con_keepalive(struct ceph_connection *con);
extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
extern void ceph_con_put(struct ceph_connection *con);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index a1800fb63237..374f0013956c 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -87,6 +87,13 @@ void ceph_osdc_release_request(struct kref *kref)
ceph_msg_put(req->r_request);
if (req->r_reply)
ceph_msg_put(req->r_reply);
+ if (req->r_con_filling_pages) {
+ dout("release_request revoking pages %p from con %p\n",
+ req->r_pages, req->r_con_filling_pages);
+ ceph_con_revoke_pages(req->r_con_filling_pages,
+ req->r_pages);
+ ceph_con_put(req->r_con_filling_pages);
+ }
if (req->r_own_pages)
ceph_release_page_vector(req->r_pages,
req->r_num_pages);
@@ -687,7 +694,8 @@ static void handle_timeout(struct work_struct *work)
* handle osd op reply. either call the callback if it is specified,
* or do the completion to wake up the waiting thread.
*/
-static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
+static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
+ struct ceph_connection *con)
{
struct ceph_osd_reply_head *rhead = msg->front.iov_base;
struct ceph_osd_request *req;
@@ -715,6 +723,16 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_osdc_get_request(req);
flags = le32_to_cpu(rhead->flags);
+ /*
+ * if this connection filled our pages, drop our reference now, to
+ * avoid a (safe but slower) revoke later.
+ */
+ if (req->r_con_filling_pages == con && req->r_pages == msg->pages) {
+ dout(" got pages, dropping con_filling_pages ref %p\n", con);
+ req->r_con_filling_pages = NULL;
+ ceph_con_put(con);
+ }
+
if (req->r_reply) {
/*
* once we see the message has been received, we don't
@@ -1007,14 +1025,20 @@ static int prepare_pages(struct ceph_connection *con, struct ceph_msg *m,
}
dout("prepare_pages tid %llu has %d pages, want %d\n",
tid, req->r_num_pages, want);
- if (likely(req->r_num_pages >= want && !req->r_prepared_pages)) {
- m->pages = req->r_pages;
- m->nr_pages = req->r_num_pages;
- req->r_reply = m; /* only for duration of read over socket */
- ceph_msg_get(m);
- req->r_prepared_pages = 1;
- ret = 0; /* success */
+ if (unlikely(req->r_num_pages < want))
+ goto out;
+
+ if (req->r_con_filling_pages) {
+ dout("revoking pages %p from old con %p\n", req->r_pages,
+ req->r_con_filling_pages);
+ ceph_con_revoke_pages(req->r_con_filling_pages, req->r_pages);
+ ceph_con_put(req->r_con_filling_pages);
}
+ req->r_con_filling_pages = ceph_con_get(con);
+ req->r_reply = ceph_msg_get(m); /* for duration of read over socket */
+ m->pages = req->r_pages;
+ m->nr_pages = req->r_num_pages;
+ ret = 0; /* success */
out:
mutex_unlock(&osdc->request_mutex);
return ret;
@@ -1269,7 +1293,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
ceph_osdc_handle_map(osdc, msg);
break;
case CEPH_MSG_OSD_OPREPLY:
- handle_reply(osdc, msg);
+ handle_reply(osdc, msg, con);
break;
default:
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index 2e4cfd1e9f10..8fef71cc4457 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -43,11 +43,13 @@ struct ceph_osd_request {
struct list_head r_osd_item;
struct ceph_osd *r_osd;
+ struct ceph_connection *r_con_filling_pages;
+
struct ceph_msg *r_request, *r_reply;
int r_result;
int r_flags; /* any additional flags for the osd */
u32 r_sent; /* >0 if r_request is sending/sent */
- int r_prepared_pages, r_got_reply;
+ int r_got_reply;
int r_num_prealloc_reply;
struct ceph_osd_client *r_osdc;