diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2019-02-28 14:20:28 +0100 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2019-03-05 18:55:17 +0100 |
commit | 89a59c1ca73b8dd43c208cdbd3658bd302cd41b4 (patch) | |
tree | 43904f9dc9c6660cd7c1421ecaed9afbd3b45228 /drivers/block/rbd.c | |
parent | 3a482501cf701f56a454f9397aa96f477db87769 (diff) | |
download | lwn-89a59c1ca73b8dd43c208cdbd3658bd302cd41b4.tar.gz lwn-89a59c1ca73b8dd43c208cdbd3658bd302cd41b4.zip |
rbd: copyup with an empty snapshot context (aka deep-copyup)
This is the core of deep-flatten feature: sending a copyup request
(i.e. a guarded write of the data read from the parent) with an empty
snapshot context (snaps = [], seq = 0) causes the OSD to reflect the
write in all existing snapshots. This allows "rbd flatten" to fully
disconnect the clone image and its snapshots from the parent and make
the parent snapshot removable.
The actual modification request is sent only after deep-copyup request
is completed. Waiting for deep-copyup reply is unnecessary, this will
be improved in the future.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r-- | drivers/block/rbd.c | 89 |
1 files changed, 79 insertions, 10 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index aa95227fdee2..4befd8f6ac9c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -221,22 +221,32 @@ enum obj_operation_type { * Writes go through the following state machine to deal with * layering: * - * need copyup - * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP - * | ^ | - * v \------------------------------/ - * done - * ^ - * | - * RBD_OBJ_WRITE_FLAT + * . . . . . RBD_OBJ_WRITE_GUARD. . . . . . . . . . . . . . + * . | . + * . v . + * . RBD_OBJ_WRITE_READ_FROM_PARENT. . . . + * . | . . + * . v v (deep-copyup . + * (image . RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC . not needed) . + * flattened) v | . . + * . v . . + * . . . .RBD_OBJ_WRITE_COPYUP_OPS. . . . . (copyup . + * | not needed) v + * v . + * done . . . . . . . . . . . . . . . . . . + * ^ + * | + * RBD_OBJ_WRITE_FLAT * * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether - * there is a parent or not. + * assert_exists guard is needed or not (in some cases it's not needed + * even if there is a parent). */ enum rbd_obj_write_state { RBD_OBJ_WRITE_FLAT = 1, RBD_OBJ_WRITE_GUARD, RBD_OBJ_WRITE_READ_FROM_PARENT, + RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC, RBD_OBJ_WRITE_COPYUP_OPS, }; @@ -422,6 +432,10 @@ static DEFINE_IDA(rbd_dev_id_ida); static struct workqueue_struct *rbd_wq; +static struct ceph_snap_context rbd_empty_snapc = { + .nref = REFCOUNT_INIT(1), +}; + /* * single-major requires >= 0.75 version of userspace rbd utility. */ @@ -2461,6 +2475,38 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) #define MODS_ONLY U32_MAX +static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, + u32 bytes) +{ + int ret; + + dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); + rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT); + rbd_assert(bytes > 0 && bytes != MODS_ONLY); + rbd_osd_req_destroy(obj_req->osd_req); + + obj_req->osd_req = __rbd_osd_req_create(obj_req, &rbd_empty_snapc, 1); + if (!obj_req->osd_req) + return -ENOMEM; + + ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup"); + if (ret) + return ret; + + osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0, + obj_req->copyup_bvecs, + obj_req->copyup_bvec_count, + bytes); + rbd_osd_req_format_write(obj_req); + + ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO); + if (ret) + return ret; + + rbd_obj_request_submit(obj_req); + return 0; +} + static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) { struct rbd_img_request *img_req = obj_req->img_request; @@ -2469,7 +2515,8 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) int ret; dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes); - rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT); + rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT || + obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_CALL); rbd_osd_req_destroy(obj_req->osd_req); switch (img_req->op_type) { @@ -2531,6 +2578,17 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) bytes = 0; } + if (obj_req->img_request->snapc->num_snaps && bytes > 0) { + /* + * Send a copyup request with an empty snapshot context to + * deep-copyup the object through all existing snapshots. + * A second request with the current snapshot context will be + * sent for the actual modification. + */ + obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC; + return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes); + } + obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; return rbd_obj_issue_copyup_ops(obj_req, bytes); } @@ -2632,6 +2690,17 @@ static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req) return true; } return false; + case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC: + if (obj_req->result) + return true; + + obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; + ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); + if (ret) { + obj_req->result = ret; + return true; + } + return false; default: BUG(); } |