From a41bad1a9b9f9982eb9b451165724c5f81096683 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Fri, 30 Nov 2012 13:49:51 +0800
Subject: ceph: re-calculate truncate_size for strip object

Otherwise osd may truncate the object to larger size.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 net/ceph/osd_client.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net/ceph')
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index eb9a44478764..267f183b801a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -76,8 +76,16 @@ int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 		     orig_len - *plen, off, *plen);
 
 	if (op_has_extent(op->op)) {
+		u32 osize = le32_to_cpu(layout->fl_object_size);
 		op->extent.offset = objoff;
 		op->extent.length = objlen;
+		if (op->extent.truncate_size <= off - objoff) {
+			op->extent.truncate_size = 0;
+		} else {
+			op->extent.truncate_size -= off - objoff;
+			if (op->extent.truncate_size > osize)
+				op->extent.truncate_size = osize;
+		}
 	}
 	req->r_num_pages = calc_pages_for(off, *plen);
 	req->r_page_alignment = off & ~PAGE_MASK;
-- 
cgit v1.2.3


From 1604f488ac2dcce33c8218e75a000e8c5fb57e61 Mon Sep 17 00:00:00 2001
From: Jim Schutt <jaschut@sandia.gov>
Date: Fri, 30 Nov 2012 09:15:25 -0700
Subject: libceph: for chooseleaf rules, retry CRUSH map descent from root if
 leaf is failed

Add libceph support for a new CRUSH tunable recently added to Ceph servers.

Consider the CRUSH rule
  step chooseleaf firstn 0 type <node_type>

This rule means that <n> replicas will be chosen in a manner such that
each chosen leaf's branch will contain a unique instance of <node_type>.

When an object is re-replicated after a leaf failure, if the CRUSH map uses
a chooseleaf rule the remapped replica ends up under the <node_type> bucket
that held the failed leaf.  This causes uneven data distribution across the
storage cluster, to the point that when all the leaves but one fail under a
particular <node_type> bucket, that remaining leaf holds all the data from
its failed peers.

This behavior also limits the number of peers that can participate in the
re-replication of the data held by the failed leaf, which increases the
time required to re-replicate after a failure.

For a chooseleaf CRUSH rule, the tree descent has two steps: call them the
inner and outer descents.

If the tree descent down to <node_type> is the outer descent, and the descent
from <node_type> down to a leaf is the inner descent, the issue is that a
down leaf is detected on the inner descent, so only the inner descent is
retried.

In order to disperse re-replicated data as widely as possible across a
storage cluster after a failure, we want to retry the outer descent. So,
fix up crush_choose() to allow the inner descent to return immediately on
choosing a failed leaf.  Wire this up as a new CRUSH tunable.

Note that after this change, for a chooseleaf rule, if the primary OSD
in a placement group has failed, choosing a replacement may result in
one of the other OSDs in the PG colliding with the new primary.  This
requires that OSD's data for that PG to need moving as well.  This
seems unavoidable but should be relatively rare.

This corresponds to ceph.git commit 88f218181a9e6d2292e2697fc93797d0f6d6e5dc.

Signed-off-by: Jim Schutt <jaschut@sandia.gov>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/ceph_features.h |  7 +++++--
 include/linux/crush/crush.h        |  2 ++
 net/ceph/crush/mapper.c            | 13 ++++++++++---
 net/ceph/osdmap.c                  |  6 ++++++
 4 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'net/ceph')

diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b7c6acbb3bf..2160aab482f6 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -14,7 +14,9 @@
 #define CEPH_FEATURE_DIRLAYOUTHASH  (1<<7)
 /* bits 8-17 defined by user-space; not supported yet here */
 #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18)
-/* bits 19-25 defined by user-space; not supported yet here */
+/* bits 19-24 defined by user-space; not supported yet here */
+#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25)
+/* bit 26 defined by user-space; not supported yet here */
 #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27)
 
 /*
@@ -22,7 +24,8 @@
  */
 #define CEPH_FEATURES_SUPPORTED_DEFAULT  \
 	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_CRUSH_TUNABLES |   \
+	 CEPH_FEATURE_CRUSH_TUNABLES |	  \
+	 CEPH_FEATURE_CRUSH_TUNABLES2 |   \
 	 CEPH_FEATURE_REPLY_CREATE_INODE)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 25baa287cff7..6a1101f24cfb 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -162,6 +162,8 @@ struct crush_map {
 	__u32 choose_local_fallback_tries;
 	/* choose attempts before giving up */ 
 	__u32 choose_total_tries;
+	/* attempt chooseleaf inner descent once; on failure retry outer descent */
+	__u32 chooseleaf_descend_once;
 };
 
 
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 35fce755ce10..96c8a58937db 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in
  * @outpos: our position in that vector
  * @firstn: true if choosing "first n" items, false if choosing "indep"
  * @recurse_to_leaf: true if we want one device under each item of given type
+ * @descend_once: true if we should only try one descent before giving up
  * @out2: second output vector for leaf items (if @recurse_to_leaf)
  */
 static int crush_choose(const struct crush_map *map,
@@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map,
 			int x, int numrep, int type,
 			int *out, int outpos,
 			int firstn, int recurse_to_leaf,
-			int *out2)
+			int descend_once, int *out2)
 {
 	int rep;
 	unsigned int ftotal, flocal;
@@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map,
 							 x, outpos+1, 0,
 							 out2, outpos,
 							 firstn, 0,
+							 map->chooseleaf_descend_once,
 							 NULL) <= outpos)
 							/* didn't get leaf */
 							reject = 1;
@@ -422,7 +424,10 @@ reject:
 					ftotal++;
 					flocal++;
 
-					if (collide && flocal <= map->choose_local_tries)
+					if (reject && descend_once)
+						/* let outer call try again */
+						skip_rep = 1;
+					else if (collide && flocal <= map->choose_local_tries)
 						/* retry locally a few times */
 						retry_bucket = 1;
 					else if (map->choose_local_fallback_tries > 0 &&
@@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map,
 	int i, j;
 	int numrep;
 	int firstn;
+	const int descend_once = 0;
 
 	if ((__u32)ruleno >= map->max_rules) {
 		dprintk(" bad ruleno %d\n", ruleno);
@@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map,
 						      curstep->arg2,
 						      o+osize, j,
 						      firstn,
-						      recurse_to_leaf, c+osize);
+						      recurse_to_leaf,
+						      descend_once, c+osize);
 			}
 
 			if (recurse_to_leaf)
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index de73214b5d26..ca05871635bc 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -170,6 +170,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         c->choose_local_tries = 2;
         c->choose_local_fallback_tries = 5;
         c->choose_total_tries = 19;
+	c->chooseleaf_descend_once = 0;
 
 	ceph_decode_need(p, end, 4*sizeof(u32), bad);
 	magic = ceph_decode_32(p);
@@ -336,6 +337,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
         dout("crush decode tunable choose_total_tries = %d",
              c->choose_total_tries);
 
+	ceph_decode_need(p, end, sizeof(u32), done);
+	c->chooseleaf_descend_once = ceph_decode_32(p);
+	dout("crush decode tunable chooseleaf_descend_once = %d",
+	     c->chooseleaf_descend_once);
+
 done:
 	dout("crush_decode success\n");
 	return c;
-- 
cgit v1.2.3


From 7d7c1f6136bac00174842f845babe7fb3483724e Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 15 Jan 2013 18:49:09 -0800
Subject: crush: avoid recursion if we have already collided

This saves us some cycles, but does not affect the placement result at
all.

This corresponds to ceph.git commit 4abb53d4f.

Signed-off-by: Sage Weil <sage@inktank.com>
---
 net/ceph/crush/mapper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ceph')

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 96c8a58937db..cbd06a91941c 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -392,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
 				}
 
 				reject = 0;
-				if (recurse_to_leaf) {
+				if (!collide && recurse_to_leaf) {
 					if (item < 0) {
 						if (crush_choose(map,
 							 map->buckets[-1-item],
-- 
cgit v1.2.3


From c3acb18196cf3d7d3db6a5121c1bc674c3fba31f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 7 Dec 2012 09:57:58 -0600
Subject: libceph: reformat __reset_osd()

Reformat __reset_osd() into three distinct blocks of code
handling the three return cases.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 net/ceph/osd_client.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'net/ceph')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 267f183b801a..eade41bb7102 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -747,31 +747,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc)
  */
 static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
 {
-	struct ceph_osd_request *req;
-	int ret = 0;
+	struct ceph_entity_addr *peer_addr;
 
 	dout("__reset_osd %p osd%d\n", osd, osd->o_osd);
 	if (list_empty(&osd->o_requests) &&
 	    list_empty(&osd->o_linger_requests)) {
 		__remove_osd(osdc, osd);
-		ret = -ENODEV;
-	} else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd],
-			  &osd->o_con.peer_addr,
-			  sizeof(osd->o_con.peer_addr)) == 0 &&
-		   !ceph_con_opened(&osd->o_con)) {
+
+		return -ENODEV;
+	}
+
+	peer_addr = &osdc->osdmap->osd_addr[osd->o_osd];
+	if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) &&
+			!ceph_con_opened(&osd->o_con)) {
+		struct ceph_osd_request *req;
+
 		dout(" osd addr hasn't changed and connection never opened,"
 		     " letting msgr retry");
 		/* touch each r_stamp for handle_timeout()'s benfit */
 		list_for_each_entry(req, &osd->o_requests, r_osd_item)
 			req->r_stamp = jiffies;
-		ret = -EAGAIN;
-	} else {
-		ceph_con_close(&osd->o_con);
-		ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
-			      &osdc->osdmap->osd_addr[osd->o_osd]);
-		osd->o_incarnation++;
+
+		return -EAGAIN;
 	}
-	return ret;
+
+	ceph_con_close(&osd->o_con);
+	ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr);
+	osd->o_incarnation++;
+
+	return 0;
 }
 
 static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new)
-- 
cgit v1.2.3


From af77f26caa35a95af09d1dac5c513b3901de7e37 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Fri, 9 Nov 2012 08:43:15 -0600
Subject: rbd: drop oid parameters from ceph_osdc_build_request()

The last two parameters to ceph_osd_build_request() describe the
object id, but the values passed always come from the osd request
structure whose address is also provided.  Get rid of those last
two parameters.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  6 +-----
 include/linux/ceph/osd_client.h |  4 +---
 net/ceph/osd_client.c           | 13 +++++--------
 3 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index d4e93a28fb6a..9a701effa0ef 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1176,11 +1176,7 @@ static int rbd_do_request(struct request *rq,
 				snapid, ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, &len,
-				ops,
-				snapc,
-				&mtime,
-				osd_req->r_oid, osd_req->r_oid_len);
+	ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d9b880e977e6..f2e5d2cdca06 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -227,9 +227,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 *plen,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
-				    struct timespec *mtime,
-				    const char *oid,
-				    int oid_len);
+				    struct timespec *mtime);
 
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index eade41bb7102..7d38327a8e89 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -376,9 +376,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 			     u64 off, u64 *plen,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc,
-			     struct timespec *mtime,
-			     const char *oid,
-			     int oid_len)
+			     struct timespec *mtime)
 {
 	struct ceph_msg *msg = req->r_request;
 	struct ceph_osd_request_head *head;
@@ -405,9 +403,9 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 
 
 	/* fill in oid */
-	head->object_len = cpu_to_le32(oid_len);
-	memcpy(p, oid, oid_len);
-	p += oid_len;
+	head->object_len = cpu_to_le32(req->r_oid_len);
+	memcpy(p, req->r_oid, req->r_oid_len);
+	p += req->r_oid_len;
 
 	src_op = src_ops;
 	while (src_op->op) {
@@ -506,8 +504,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 
 	ceph_osdc_build_request(req, off, plen, ops,
 				snapc,
-				mtime,
-				req->r_oid, req->r_oid_len);
+				mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From c885837f7d4f8c4f5cb2a744cc6929bc078e9dc0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: always allow trail in osd request

An osd request structure contains an optional trail portion, which
if present will contain data to be passed in the payload portion of
the message containing the request.  The trail field is a
ceph_pagelist pointer, and if null it indicates there is no trail.

A ceph_pagelist structure contains a length field, and it can
legitimately hold value 0.  Make use of this to change the
interpretation of the "trail" of an osd request so that every osd
request has trailing data, it just might have length 0.

This means we change the r_trail field in a ceph_osd_request
structure from a pointer to a structure that is always initialized.

Note that in ceph_osdc_start_request(), the trail pointer (or now
address of that structure) is assigned to a ceph message's trail
field.  Here's why that's still OK (looking at net/ceph/messenger.c):
    - What would have resulted in a null pointer previously will now
      refer to a 0-length page list.  That message trail pointer
      is used in two functions, write_partial_msg_pages() and
      out_msg_pos_next().
    - In write_partial_msg_pages(), a null page list pointer is
      handled the same as a message with 0-length trail, and both
      result in a "in_trail" variable set to false.  The trail
      pointer is only used if in_trail is true.
    - The only other place the message trail pointer is used is
      out_msg_pos_next().  That function is only called by
      write_partial_msg_pages() and only touches the trail pointer
      if the in_trail value it is passed is true.
Therefore a null ceph_msg->trail pointer is equivalent to a non-null
pointer referring to a 0-length page list structure.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 include/linux/ceph/osd_client.h |  4 ++--
 net/ceph/osd_client.c           | 43 ++++++++++++-----------------------------
 2 files changed, 14 insertions(+), 33 deletions(-)

(limited to 'net/ceph')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f2e5d2cdca06..61562c792855 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -10,6 +10,7 @@
 #include <linux/ceph/osdmap.h>
 #include <linux/ceph/messenger.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/pagelist.h>
 
 /* 
  * Maximum object name size 
@@ -22,7 +23,6 @@ struct ceph_snap_context;
 struct ceph_osd_request;
 struct ceph_osd_client;
 struct ceph_authorizer;
-struct ceph_pagelist;
 
 /*
  * completion callback for async writepages
@@ -95,7 +95,7 @@ struct ceph_osd_request {
 	struct bio       *r_bio;	      /* instead of pages */
 #endif
 
-	struct ceph_pagelist *r_trail;	      /* trailing part of the data */
+	struct ceph_pagelist r_trail;	      /* trailing part of the data */
 };
 
 struct ceph_osd_event {
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7d38327a8e89..2be50d82ccbc 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,10 +171,7 @@ void ceph_osdc_release_request(struct kref *kref)
 		bio_put(req->r_bio);
 #endif
 	ceph_put_snap_context(req->r_snapc);
-	if (req->r_trail) {
-		ceph_pagelist_release(req->r_trail);
-		kfree(req->r_trail);
-	}
+	ceph_pagelist_release(&req->r_trail);
 	if (req->r_mempool)
 		mempool_free(req, req->r_osdc->req_mempool);
 	else
@@ -208,8 +205,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int needs_trail;
-	int num_op = get_num_ops(ops, &needs_trail);
+	int num_op = get_num_ops(ops, NULL);
 	size_t msg_size = sizeof(struct ceph_osd_request_head);
 
 	msg_size += num_op*sizeof(struct ceph_osd_op);
@@ -252,15 +248,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	}
 	req->r_reply = msg;
 
-	/* allocate space for the trailing data */
-	if (needs_trail) {
-		req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags);
-		if (!req->r_trail) {
-			ceph_osdc_put_request(req);
-			return NULL;
-		}
-		ceph_pagelist_init(req->r_trail);
-	}
+	ceph_pagelist_init(&req->r_trail);
 
 	/* create request message; allow space for oid */
 	msg_size += MAX_OBJ_NAME_SIZE;
@@ -312,29 +300,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 	case CEPH_OSD_OP_GETXATTR:
 	case CEPH_OSD_OP_SETXATTR:
 	case CEPH_OSD_OP_CMPXATTR:
-		BUG_ON(!req->r_trail);
-
 		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
 		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
 		dst->xattr.cmp_op = src->xattr.cmp_op;
 		dst->xattr.cmp_mode = src->xattr.cmp_mode;
-		ceph_pagelist_append(req->r_trail, src->xattr.name,
+		ceph_pagelist_append(&req->r_trail, src->xattr.name,
 				     src->xattr.name_len);
-		ceph_pagelist_append(req->r_trail, src->xattr.val,
+		ceph_pagelist_append(&req->r_trail, src->xattr.val,
 				     src->xattr.value_len);
 		break;
 	case CEPH_OSD_OP_CALL:
-		BUG_ON(!req->r_trail);
-
 		dst->cls.class_len = src->cls.class_len;
 		dst->cls.method_len = src->cls.method_len;
 		dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
 
-		ceph_pagelist_append(req->r_trail, src->cls.class_name,
+		ceph_pagelist_append(&req->r_trail, src->cls.class_name,
 				     src->cls.class_len);
-		ceph_pagelist_append(req->r_trail, src->cls.method_name,
+		ceph_pagelist_append(&req->r_trail, src->cls.method_name,
 				     src->cls.method_len);
-		ceph_pagelist_append(req->r_trail, src->cls.indata,
+		ceph_pagelist_append(&req->r_trail, src->cls.indata,
 				     src->cls.indata_len);
 		break;
 	case CEPH_OSD_OP_ROLLBACK:
@@ -347,11 +331,9 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 			__le32 prot_ver = cpu_to_le32(src->watch.prot_ver);
 			__le32 timeout = cpu_to_le32(src->watch.timeout);
 
-			BUG_ON(!req->r_trail);
-
-			ceph_pagelist_append(req->r_trail,
+			ceph_pagelist_append(&req->r_trail,
 						&prot_ver, sizeof(prot_ver));
-			ceph_pagelist_append(req->r_trail,
+			ceph_pagelist_append(&req->r_trail,
 						&timeout, sizeof(timeout));
 		}
 	case CEPH_OSD_OP_NOTIFY_ACK:
@@ -414,8 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 		op++;
 	}
 
-	if (req->r_trail)
-		data_len += req->r_trail->length;
+	data_len += req->r_trail.length;
 
 	if (snapc) {
 		head->snap_seq = cpu_to_le64(snapc->seq);
@@ -1715,7 +1696,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
 #ifdef CONFIG_BLOCK
 	req->r_request->bio = req->r_bio;
 #endif
-	req->r_request->trail = req->r_trail;
+	req->r_request->trail = &req->r_trail;
 
 	register_request(osdc, req);
 
-- 
cgit v1.2.3


From 5b9d1b1cd46aa6c8abf891f25c15aee31538da7e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: kill op_needs_trail()

Since every osd message is now prepared to include trailing data,
there's no need to check ahead of time whether any operations will
make use of the trail portion of the message.

We can drop the second argument to get_num_ops(), and as a result we
can also get rid of op_needs_trail() which is no longer used.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 net/ceph/osd_client.c | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

(limited to 'net/ceph')

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2be50d82ccbc..37d43d5b828c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -32,20 +32,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
 static void __send_request(struct ceph_osd_client *osdc,
 			   struct ceph_osd_request *req);
 
-static int op_needs_trail(int op)
-{
-	switch (op) {
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_CALL:
-	case CEPH_OSD_OP_NOTIFY:
-		return 1;
-	default:
-		return 0;
-	}
-}
-
 static int op_has_extent(int op)
 {
 	return (op == CEPH_OSD_OP_READ ||
@@ -179,17 +165,12 @@ void ceph_osdc_release_request(struct kref *kref)
 }
 EXPORT_SYMBOL(ceph_osdc_release_request);
 
-static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail)
+static int get_num_ops(struct ceph_osd_req_op *ops)
 {
 	int i = 0;
 
-	if (needs_trail)
-		*needs_trail = 0;
-	while (ops[i].op) {
-		if (needs_trail && op_needs_trail(ops[i].op))
-			*needs_trail = 1;
+	while (ops[i].op)
 		i++;
-	}
 
 	return i;
 }
@@ -205,7 +186,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int num_op = get_num_ops(ops, NULL);
+	int num_op = get_num_ops(ops);
 	size_t msg_size = sizeof(struct ceph_osd_request_head);
 
 	msg_size += num_op*sizeof(struct ceph_osd_op);
@@ -365,7 +346,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	struct ceph_osd_req_op *src_op;
 	struct ceph_osd_op *op;
 	void *p;
-	int num_op = get_num_ops(src_ops, NULL);
+	int num_op = get_num_ops(src_ops);
 	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
 	int flags = req->r_flags;
 	u64 data_len = 0;
-- 
cgit v1.2.3


From 0120be3c60d46d6d55f4bf7a3d654cc705eb0c54 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 14 Nov 2012 09:38:19 -0600
Subject: libceph: pass length to ceph_osdc_build_request()

The len argument to ceph_osdc_build_request() is set up to be
passed by address, but that function never updates its value
so there's no need to do this.  Tighten up the interface by
passing the length directly.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             | 2 +-
 include/linux/ceph/osd_client.h | 2 +-
 net/ceph/osd_client.c           | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 54bd9fc3ef7c..c1b135b6cb97 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1172,7 +1172,7 @@ static int rbd_do_request(struct request *rq,
 				snapid, ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 61562c792855..4bfb4582439a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -224,7 +224,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *
 					       struct bio *bio);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
-				    u64 off, u64 *plen,
+				    u64 off, u64 len,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
 				    struct timespec *mtime);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 37d43d5b828c..e29a3ed92958 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -336,7 +336,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 *plen,
+			     u64 off, u64 len,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc,
 			     struct timespec *mtime)
@@ -390,7 +390,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 
 	if (flags & CEPH_OSD_FLAG_WRITE) {
 		req->r_request->hdr.data_off = cpu_to_le16(off);
-		req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len);
+		req->r_request->hdr.data_len = cpu_to_le32(len + data_len);
 	} else if (data_len) {
 		req->r_request->hdr.data_off = 0;
 		req->r_request->hdr.data_len = cpu_to_le32(data_len);
@@ -464,7 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, plen, ops,
+	ceph_osdc_build_request(req, off, *plen, ops,
 				snapc,
 				mtime);
 
-- 
cgit v1.2.3


From e8afad656cbcd06d02a7bacd4b318fa0e2907de0 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Wed, 14 Nov 2012 09:38:19 -0600
Subject: libceph: pass length to ceph_calc_file_object_mapping()

ceph_calc_file_object_mapping() takes (among other things) a "file"
offset and length, and based on the layout, determines the object
number ("bno") backing the affected portion of the file's data and
the offset into that object where the desired range begins.  It also
computes the size that should be used for the request--either the
amount requested or something less if that would exceed the end of
the object.

This patch changes the input length parameter in this function so it
is used only for input.  That is, the argument will be passed by
value rather than by address, so the value provided won't get
updated by the function.

The value would only get updated if the length would surpass the
current object, and in that case the value it got updated to would
be exactly that returned in *oxlen.

Only one of the two callers is affected by this change.  Update
ceph_calc_raw_layout() so it records any updated value.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 fs/ceph/ioctl.c             | 2 +-
 include/linux/ceph/osdmap.h | 2 +-
 net/ceph/osd_client.c       | 6 ++++--
 net/ceph/osdmap.c           | 9 ++++-----
 4 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'net/ceph')

diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 36549a46e311..3b22150d3e19 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		return -EFAULT;
 
 	down_read(&osdc->map_sem);
-	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
+	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
 					  &dl.object_no, &dl.object_offset,
 					  &olen);
 	if (r < 0)
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 5ea57ba69320..1f653e2ff5cc 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map);
 
 /* calculate mapping of a file extent to an object */
 extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-					 u64 off, u64 *plen,
+					 u64 off, u64 len,
 					 u64 *bno, u64 *oxoff, u64 *oxlen);
 
 /* calculate mapping of object to a placement group */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e29a3ed92958..47e5f5b1f94c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -53,13 +53,15 @@ int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 	reqhead->snapid = cpu_to_le64(snapid);
 
 	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, plen, bno,
+	r = ceph_calc_file_object_mapping(layout, off, orig_len, bno,
 					  &objoff, &objlen);
 	if (r < 0)
 		return r;
-	if (*plen < orig_len)
+	if (objlen < orig_len) {
+		*plen = objlen;
 		dout(" skipping last %llu, final file extent %llu~%llu\n",
 		     orig_len - *plen, off, *plen);
+	}
 
 	if (op_has_extent(op->op)) {
 		u32 osize = le32_to_cpu(layout->fl_object_size);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index ca05871635bc..369f03ba9ee5 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1016,7 +1016,7 @@ bad:
  * pass a stride back to the caller.
  */
 int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 *plen,
+				   u64 off, u64 len,
 				   u64 *ono,
 				   u64 *oxoff, u64 *oxlen)
 {
@@ -1027,7 +1027,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 	u32 su_per_object;
 	u64 t, su_offset;
 
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, *plen,
+	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
 	     osize, su);
 	if (su == 0 || sc == 0)
 		goto invalid;
@@ -1060,11 +1060,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
 
 	/*
 	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (plen) or
+	 * object. This is the minimum of the full length requested (len) or
 	 * the remainder of the current stripe being written to.
 	 */
-	*oxlen = min_t(u64, *plen, su - su_offset);
-	*plen = *oxlen;
+	*oxlen = min_t(u64, len, su - su_offset);
 
 	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
 	return 0;
-- 
cgit v1.2.3


From 4d6b250bf18d44571d69a0f4afec4b6a1969729f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: drop snapid in ceph_calc_raw_layout()

A snapshot id must be provided to ceph_calc_raw_layout() even though
it is not needed at all for calculating the layout.

Where the snapshot id *is* needed is when building the request
message for an osd operation.

Drop the snapid parameter from ceph_calc_raw_layout() and pass
that value instead in ceph_osdc_build_request().

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  4 ++--
 include/linux/ceph/osd_client.h |  2 +-
 net/ceph/osd_client.c           | 14 ++++----------
 3 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c1b135b6cb97..fa371868e9b0 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1169,10 +1169,10 @@ static int rbd_do_request(struct request *rq,
 
 	rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id);
 	ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout,
-				snapid, ofs, &len, &bno, osd_req, ops);
+				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 4bfb4582439a..0e82a0a967ef 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -209,7 +209,6 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 
 extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 			struct ceph_file_layout *layout,
-			u64 snapid,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op);
@@ -227,6 +226,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
+				    u64 snap_id,
 				    struct timespec *mtime);
 
 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 47e5f5b1f94c..b5a4b2875e8a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -40,18 +40,14 @@ static int op_has_extent(int op)
 
 int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
 			struct ceph_file_layout *layout,
-			u64 snapid,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op)
 {
-	struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
 	u64 orig_len = *plen;
 	u64 objoff, objlen;    /* extent in object */
 	int r;
 
-	reqhead->snapid = cpu_to_le64(snapid);
-
 	/* object extent? */
 	r = ceph_calc_file_object_mapping(layout, off, orig_len, bno,
 					  &objoff, &objlen);
@@ -121,8 +117,7 @@ static int calc_layout(struct ceph_osd_client *osdc,
 	u64 bno;
 	int r;
 
-	r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
-				 plen, &bno, req, op);
+	r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op);
 	if (r < 0)
 		return r;
 
@@ -340,7 +335,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
 void ceph_osdc_build_request(struct ceph_osd_request *req,
 			     u64 off, u64 len,
 			     struct ceph_osd_req_op *src_ops,
-			     struct ceph_snap_context *snapc,
+			     struct ceph_snap_context *snapc, u64 snap_id,
 			     struct timespec *mtime)
 {
 	struct ceph_msg *msg = req->r_request;
@@ -355,6 +350,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	int i;
 
 	head = msg->front.iov_base;
+	head->snapid = cpu_to_le64(snap_id);
 	op = (void *)(head + 1);
 	p = (void *)(op + num_op);
 
@@ -466,9 +462,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, *plen, ops,
-				snapc,
-				mtime);
+	ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From e75b45cf36565fd8ba206a9d80f670a86e61ba2f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:14 -0600
Subject: libceph: drop osdc from ceph_calc_raw_layout()

The osdc parameter to ceph_calc_raw_layout() is not used, so get rid
of it.  Consequently, the corresponding parameter in calc_layout()
becomes unused, so get rid of that as well.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  2 +-
 include/linux/ceph/osd_client.h |  3 +--
 net/ceph/osd_client.c           | 10 ++++------
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index fa371868e9b0..ac8fd3856509 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1168,7 +1168,7 @@ static int rbd_do_request(struct request *rq,
 	osd_req->r_oid_len = strlen(osd_req->r_oid);
 
 	rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id);
-	ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout,
+	ret = ceph_calc_raw_layout(&osd_req->r_file_layout,
 				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 0e82a0a967ef..fe3a6e8db1f9 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -207,8 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 
-extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
+extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5a4b2875e8a..cd9c28387de3 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -38,8 +38,7 @@ static int op_has_extent(int op)
 		op == CEPH_OSD_OP_WRITE);
 }
 
-int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
-			struct ceph_file_layout *layout,
+int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			u64 off, u64 *plen, u64 *bno,
 			struct ceph_osd_request *req,
 			struct ceph_osd_req_op *op)
@@ -107,8 +106,7 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
  *
  * fill osd op in request message.
  */
-static int calc_layout(struct ceph_osd_client *osdc,
-		       struct ceph_vino vino,
+static int calc_layout(struct ceph_vino vino,
 		       struct ceph_file_layout *layout,
 		       u64 off, u64 *plen,
 		       struct ceph_osd_request *req,
@@ -117,7 +115,7 @@ static int calc_layout(struct ceph_osd_client *osdc,
 	u64 bno;
 	int r;
 
-	r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op);
+	r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op);
 	if (r < 0)
 		return r;
 
@@ -452,7 +450,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 		return ERR_PTR(-ENOMEM);
 
 	/* calculate max write size */
-	r = calc_layout(osdc, vino, layout, off, plen, req, ops);
+	r = calc_layout(vino, layout, off, plen, req, ops);
 	if (r < 0)
 		return ERR_PTR(r);
 	req->r_file_layout = *layout;  /* keep a copy */
-- 
cgit v1.2.3


From d178a9e74006e80f568d87e29f2a68f14fc7cbb1 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: don't set flags in ceph_osdc_alloc_request()

The only thing ceph_osdc_alloc_request() really does with the
flags value it is passed is assign it to the newly-created
osd request structure.  Do that in the caller instead.

Both callers subsequently call ceph_osdc_build_request(), so have
that function (instead of ceph_osdc_alloc_request()) issue a warning
if a request comes through with neither the read nor write flags set.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  3 ++-
 include/linux/ceph/osd_client.h |  1 -
 net/ceph/osd_client.c           | 11 ++++-------
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ac8fd3856509..bdbaa4cfd9d3 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1148,13 +1148,14 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, flags, snapc, ops,
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops,
 					false, GFP_NOIO, pages, bio);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
 	}
 
+	osd_req->r_flags = flags;
 	osd_req->r_callback = rbd_cb;
 
 	rbd_req->rq = rq;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fe3a6e8db1f9..6ddda5bbd1a6 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -213,7 +213,6 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 			struct ceph_osd_req_op *op);
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index cd9c28387de3..77ce1edaa07d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -171,7 +171,6 @@ static int get_num_ops(struct ceph_osd_req_op *ops)
 }
 
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
-					       int flags,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
@@ -208,10 +207,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	INIT_LIST_HEAD(&req->r_req_lru_item);
 	INIT_LIST_HEAD(&req->r_osd_item);
 
-	req->r_flags = flags;
-
-	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
-
 	/* create reply message */
 	if (use_mempool)
 		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0);
@@ -347,6 +342,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	u64 data_len = 0;
 	int i;
 
+	WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
+
 	head = msg->front.iov_base;
 	head->snapid = cpu_to_le64(snap_id);
 	op = (void *)(head + 1);
@@ -442,12 +439,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	} else
 		ops[1].op = 0;
 
-	req = ceph_osdc_alloc_request(osdc, flags,
-					 snapc, ops,
+	req = ceph_osdc_alloc_request(osdc, snapc, ops,
 					 use_mempool,
 					 GFP_NOFS, NULL, NULL);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
+	req->r_flags = flags;
 
 	/* calculate max write size */
 	r = calc_layout(vino, layout, off, plen, req, ops);
-- 
cgit v1.2.3


From 54a5400721da7fa5a16cea151aade5bdfee74111 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: don't set pages or bio in ceph_osdc_alloc_request()

Only one of the two callers of ceph_osdc_alloc_request() provides
page or bio data for its payload.  And essentially all that function
was doing with those arguments was assigning them to fields in the
osd request structure.

Simplify ceph_osdc_alloc_request() by having the caller take care of
making those assignments

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  8 ++++++--
 include/linux/ceph/osd_client.h |  4 +---
 net/ceph/osd_client.c           | 15 ++-------------
 3 files changed, 9 insertions(+), 18 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bdbaa4cfd9d3..d1445df6398a 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1148,14 +1148,18 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops,
-					false, GFP_NOIO, pages, bio);
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
 	}
 
 	osd_req->r_flags = flags;
+	osd_req->r_pages = pages;
+	if (bio) {
+		osd_req->r_bio = bio;
+		bio_get(osd_req->r_bio);
+	}
 	osd_req->r_callback = rbd_cb;
 
 	rbd_req->rq = rq;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 6ddda5bbd1a6..75f56d372d44 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -216,9 +216,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio);
+					       gfp_t gfp_flags);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 77ce1edaa07d..bdc3bb12bfd5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -174,9 +174,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
 					       struct ceph_osd_req_op *ops,
 					       bool use_mempool,
-					       gfp_t gfp_flags,
-					       struct page **pages,
-					       struct bio *bio)
+					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
@@ -237,13 +235,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 	memset(msg->front.iov_base, 0, msg->front.iov_len);
 
 	req->r_request = msg;
-	req->r_pages = pages;
-#ifdef CONFIG_BLOCK
-	if (bio) {
-		req->r_bio = bio;
-		bio_get(req->r_bio);
-	}
-#endif
 
 	return req;
 }
@@ -439,9 +430,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	} else
 		ops[1].op = 0;
 
-	req = ceph_osdc_alloc_request(osdc, snapc, ops,
-					 use_mempool,
-					 GFP_NOFS, NULL, NULL);
+	req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 	req->r_flags = flags;
-- 
cgit v1.2.3


From ae7ca4a35b1f5df86e2c32b2cfc01a8d528c7b8c Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@inktank.com>
Date: Tue, 13 Nov 2012 21:11:15 -0600
Subject: libceph: pass num_op with ops

Both ceph_osdc_alloc_request() and ceph_osdc_build_request() are
provided an array of ceph osd request operations.  Rather than just
passing the number of operations in the array, the caller is
required append an additional zeroed operation structure to signal
the end of the array.

All callers know the number of operations at the time these
functions are called, so drop the silly zero entry and supply that
number directly.  As a result, get_num_ops() is no longer needed.
This also means that ceph_osdc_alloc_request() never uses its ops
argument, so that can be dropped.

Also rbd_create_rw_ops() no longer needs to add one to reserve room
for the additional op.

Signed-off-by: Alex Elder <elder@inktank.com>
Reviewed-by: Josh Durgin <josh.durgin@inktank.com>
---
 drivers/block/rbd.c             |  9 +++++----
 include/linux/ceph/osd_client.h |  3 ++-
 net/ceph/osd_client.c           | 43 ++++++++++++++---------------------------
 3 files changed, 22 insertions(+), 33 deletions(-)

(limited to 'net/ceph')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b6872d3cb04c..88de8ccb29bd 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1026,12 +1026,12 @@ out_err:
 /*
  * helpers for osd request op vectors.
  */
-static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops,
+static struct ceph_osd_req_op *rbd_create_rw_ops(int num_op,
 					int opcode, u32 payload_len)
 {
 	struct ceph_osd_req_op *ops;
 
-	ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO);
+	ops = kzalloc(num_op * sizeof (*ops), GFP_NOIO);
 	if (!ops)
 		return NULL;
 
@@ -1149,7 +1149,7 @@ static int rbd_do_request(struct request *rq,
 		(unsigned long long) len, coll, coll_index);
 
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO);
+	osd_req = ceph_osdc_alloc_request(osdc, snapc, num_op, false, GFP_NOIO);
 	if (!osd_req) {
 		ret = -ENOMEM;
 		goto done_pages;
@@ -1178,7 +1178,8 @@ static int rbd_do_request(struct request *rq,
 				ofs, &len, &bno, osd_req, ops);
 	rbd_assert(ret == 0);
 
-	ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime);
+	ceph_osdc_build_request(osd_req, ofs, len, num_op, ops,
+				snapc, snapid, &mtime);
 
 	if (linger_req) {
 		ceph_osdc_set_request_linger(osdc, osd_req);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 75f56d372d44..2b04d054e09d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -214,12 +214,13 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout,
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
+					       unsigned int num_op,
 					       bool use_mempool,
 					       gfp_t gfp_flags);
 
 extern void ceph_osdc_build_request(struct ceph_osd_request *req,
 				    u64 off, u64 len,
+				    unsigned int num_op,
 				    struct ceph_osd_req_op *src_ops,
 				    struct ceph_snap_context *snapc,
 				    u64 snap_id,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index bdc3bb12bfd5..500ae8b49321 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -160,25 +160,14 @@ void ceph_osdc_release_request(struct kref *kref)
 }
 EXPORT_SYMBOL(ceph_osdc_release_request);
 
-static int get_num_ops(struct ceph_osd_req_op *ops)
-{
-	int i = 0;
-
-	while (ops[i].op)
-		i++;
-
-	return i;
-}
-
 struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
-					       struct ceph_osd_req_op *ops,
+					       unsigned int num_op,
 					       bool use_mempool,
 					       gfp_t gfp_flags)
 {
 	struct ceph_osd_request *req;
 	struct ceph_msg *msg;
-	int num_op = get_num_ops(ops);
 	size_t msg_size = sizeof(struct ceph_osd_request_head);
 
 	msg_size += num_op*sizeof(struct ceph_osd_op);
@@ -317,7 +306,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
  *
  */
 void ceph_osdc_build_request(struct ceph_osd_request *req,
-			     u64 off, u64 len,
+			     u64 off, u64 len, unsigned int num_op,
 			     struct ceph_osd_req_op *src_ops,
 			     struct ceph_snap_context *snapc, u64 snap_id,
 			     struct timespec *mtime)
@@ -327,7 +316,6 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	struct ceph_osd_req_op *src_op;
 	struct ceph_osd_op *op;
 	void *p;
-	int num_op = get_num_ops(src_ops);
 	size_t msg_size = sizeof(*head) + num_op*sizeof(*op);
 	int flags = req->r_flags;
 	u64 data_len = 0;
@@ -346,20 +334,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req,
 	head->flags = cpu_to_le32(flags);
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		ceph_encode_timespec(&head->mtime, mtime);
+	BUG_ON(num_op > (unsigned int) ((u16) -1));
 	head->num_ops = cpu_to_le16(num_op);
 
-
 	/* fill in oid */
 	head->object_len = cpu_to_le32(req->r_oid_len);
 	memcpy(p, req->r_oid, req->r_oid_len);
 	p += req->r_oid_len;
 
 	src_op = src_ops;
-	while (src_op->op) {
-		osd_req_encode_op(req, op, src_op);
-		src_op++;
-		op++;
-	}
+	while (num_op--)
+		osd_req_encode_op(req, op++, src_op++);
 
 	data_len += req->r_trail.length;
 
@@ -414,23 +399,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       bool use_mempool, int num_reply,
 					       int page_align)
 {
-	struct ceph_osd_req_op ops[3];
+	struct ceph_osd_req_op ops[2];
 	struct ceph_osd_request *req;
+	unsigned int num_op = 1;
 	int r;
 
+	memset(&ops, 0, sizeof ops);
+
 	ops[0].op = opcode;
 	ops[0].extent.truncate_seq = truncate_seq;
 	ops[0].extent.truncate_size = truncate_size;
-	ops[0].payload_len = 0;
 
 	if (do_sync) {
 		ops[1].op = CEPH_OSD_OP_STARTSYNC;
-		ops[1].payload_len = 0;
-		ops[2].op = 0;
-	} else
-		ops[1].op = 0;
+		num_op++;
+	}
 
-	req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS);
+	req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool,
+					GFP_NOFS);
 	if (!req)
 		return ERR_PTR(-ENOMEM);
 	req->r_flags = flags;
@@ -446,7 +432,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	req->r_num_pages = calc_pages_for(page_align, *plen);
 	req->r_page_alignment = page_align;
 
-	ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime);
+	ceph_osdc_build_request(req, off, *plen, num_op, ops,
+				snapc, vino.snap, mtime);
 
 	return req;
 }
-- 
cgit v1.2.3


From 1ec3911dbd19076bcdfe5540096ff67f91a6ec02 Mon Sep 17 00:00:00 2001
From: Cong Ding <dinggnu@gmail.com>
Date: Fri, 25 Jan 2013 17:48:59 -0600
Subject: libceph: fix undefined behavior when using snprintf()

The variable "str" is used as both the source and destination in
function snprintf(), which is undefined behavior based on C11. The
original description in C11 is:
	"If copying takes place between objects that
	overlap, the behavior is undefined."

And, the function of ceph_osdmap_state_str() is to return the osdmap
state, so it should return "doesn't exist" when all the conditions
are not satisfied. I fix it in this patch.

[elder@inktank.com: shortened the commit message]

Signed-off-by: Cong Ding <dinggnu@gmail.com>
Reviewed-by: Alex Elder <elder@inktank.com>
---
 net/ceph/osdmap.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'net/ceph')

diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 369f03ba9ee5..3c61e21611d3 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -13,26 +13,18 @@
 
 char *ceph_osdmap_state_str(char *str, int len, int state)
 {
-	int flag = 0;
-
 	if (!len)
-		goto done;
-
-	*str = '\0';
-	if (state) {
-		if (state & CEPH_OSD_EXISTS) {
-			snprintf(str, len, "exists");
-			flag = 1;
-		}
-		if (state & CEPH_OSD_UP) {
-			snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""),
-				 "up");
-			flag = 1;
-		}
-	} else {
+		return str;
+
+	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
+		snprintf(str, len, "exists, up");
+	else if (state & CEPH_OSD_EXISTS)
+		snprintf(str, len, "exists");
+	else if (state & CEPH_OSD_UP)
+		snprintf(str, len, "up");
+	else
 		snprintf(str, len, "doesn't exist");
-	}
-done:
+
 	return str;
 }
 
-- 
cgit v1.2.3