Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 79528734 authored by Alex Elder's avatar Alex Elder Committed by Sage Weil
Browse files

libceph: keep source rather than message osd op array

An osd request keeps a pointer to the osd operations (ops) array
that it builds in its request message.

In order to allow each op in the array to have its own distinct
data, we will need to keep track of each op's data, and that
information does not go over the wire.

As long as we're tracking the data we might as well just track the
entire (source) op definition for each of the ops.  And if we're
doing that, we'll have no more need to keep a pointer to the
wire-encoded version.

This patch makes the array of source ops be kept with the osd
request structure, and uses that instead of the version encoded in
the message in places where that was previously used.  The array
will be embedded in the request structure, and the maximum number of
ops we ever actually use is currently 2.  So reduce CEPH_OSD_MAX_OP
to 2 to reduce the size of the structure.

The result of doing this sort of ripples back up, and as a result
various function parameters and local variables become unnecessary.

Make r_num_ops be unsigned, and move the definition of struct
ceph_osd_req_op earlier to ensure it's defined where needed.

It does not yet add per-op data, that's coming soon.

This resolves:
    http://tracker.ceph.com/issues/4656



Signed-off-by: default avatarAlex Elder <elder@inktank.com>
Reviewed-by: default avatarJosh Durgin <josh.durgin@inktank.com>
parent 430c28c3
Loading
Loading
Loading
Loading
+23 −19
Original line number Diff line number Diff line
@@ -1285,7 +1285,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
	 */
	obj_request->xferred = osd_req->r_reply_op_len[0];
	rbd_assert(obj_request->xferred < (u64) UINT_MAX);
	opcode = osd_req->r_request_ops[0].op;
	opcode = osd_req->r_ops[0].op;
	switch (opcode) {
	case CEPH_OSD_OP_READ:
		rbd_osd_read_callback(obj_request);
@@ -1312,8 +1312,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
}

static void rbd_osd_req_format_op(struct rbd_obj_request *obj_request,
					bool write_request,
					struct ceph_osd_req_op *op)
					bool write_request)
{
	struct rbd_img_request *img_request = obj_request->img_request;
	struct ceph_snap_context *snapc = NULL;
@@ -1333,7 +1332,7 @@ static void rbd_osd_req_format_op(struct rbd_obj_request *obj_request,
	}

	ceph_osdc_build_request(obj_request->osd_req, obj_request->offset,
			1, op, snapc, snap_id, mtime);
			snapc, snap_id, mtime);
}

static struct ceph_osd_request *rbd_osd_req_create(
@@ -1562,7 +1561,7 @@ static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
	while (resid) {
		const char *object_name;
		unsigned int clone_size;
		struct ceph_osd_req_op op;
		struct ceph_osd_req_op *op;
		u64 offset;
		u64 length;

@@ -1591,8 +1590,9 @@ static int rbd_img_request_fill_bio(struct rbd_img_request *img_request,
		if (!obj_request->osd_req)
			goto out_partial;

		osd_req_op_extent_init(&op, opcode, offset, length, 0, 0);
		rbd_osd_req_format_op(obj_request, write_request, &op);
		op = &obj_request->osd_req->r_ops[0];
		osd_req_op_extent_init(op, opcode, offset, length, 0, 0);
		rbd_osd_req_format_op(obj_request, write_request);

		/* status and version are initially zero-filled */

@@ -1694,7 +1694,7 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
				   u64 ver, u64 notify_id)
{
	struct rbd_obj_request *obj_request;
	struct ceph_osd_req_op op;
	struct ceph_osd_req_op *op;
	struct ceph_osd_client *osdc;
	int ret;

@@ -1708,8 +1708,9 @@ static int rbd_obj_notify_ack(struct rbd_device *rbd_dev,
	if (!obj_request->osd_req)
		goto out;

	osd_req_op_watch_init(&op, CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver, 0);
	rbd_osd_req_format_op(obj_request, false, &op);
	op = &obj_request->osd_req->r_ops[0];
	osd_req_op_watch_init(op, CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver, 0);
	rbd_osd_req_format_op(obj_request, false);

	osdc = &rbd_dev->rbd_client->client->osdc;
	obj_request->callback = rbd_obj_request_put;
@@ -1749,7 +1750,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
{
	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
	struct rbd_obj_request *obj_request;
	struct ceph_osd_req_op op;
	struct ceph_osd_req_op *op;
	int ret;

	rbd_assert(start ^ !!rbd_dev->watch_event);
@@ -1773,10 +1774,11 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
	if (!obj_request->osd_req)
		goto out_cancel;

	osd_req_op_watch_init(&op, CEPH_OSD_OP_WATCH,
	op = &obj_request->osd_req->r_ops[0];
	osd_req_op_watch_init(op, CEPH_OSD_OP_WATCH,
				rbd_dev->watch_event->cookie,
				rbd_dev->header.obj_version, start);
	rbd_osd_req_format_op(obj_request, true, &op);
	rbd_osd_req_format_op(obj_request, true);

	if (start)
		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
@@ -1836,7 +1838,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
{
	struct rbd_obj_request *obj_request;
	struct ceph_osd_client *osdc;
	struct ceph_osd_req_op op;
	struct ceph_osd_req_op *op;
	struct page **pages;
	u32 page_count;
	int ret;
@@ -1866,9 +1868,10 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev,
	if (!obj_request->osd_req)
		goto out;

	osd_req_op_cls_init(&op, CEPH_OSD_OP_CALL, class_name, method_name,
	op = &obj_request->osd_req->r_ops[0];
	osd_req_op_cls_init(op, CEPH_OSD_OP_CALL, class_name, method_name,
					outbound, outbound_size);
	rbd_osd_req_format_op(obj_request, false, &op);
	rbd_osd_req_format_op(obj_request, false);

	osdc = &rbd_dev->rbd_client->client->osdc;
	ret = rbd_obj_request_submit(osdc, obj_request);
@@ -2046,8 +2049,8 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
				char *buf, u64 *version)

{
	struct ceph_osd_req_op op;
	struct rbd_obj_request *obj_request;
	struct ceph_osd_req_op *op;
	struct ceph_osd_client *osdc;
	struct page **pages = NULL;
	u32 page_count;
@@ -2072,8 +2075,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
	if (!obj_request->osd_req)
		goto out;

	osd_req_op_extent_init(&op, CEPH_OSD_OP_READ, offset, length, 0, 0);
	rbd_osd_req_format_op(obj_request, false, &op);
	op = &obj_request->osd_req->r_ops[0];
	osd_req_op_extent_init(op, CEPH_OSD_OP_READ, offset, length, 0, 0);
	rbd_osd_req_format_op(obj_request, false);

	osdc = &rbd_dev->rbd_client->client->osdc;
	ret = rbd_obj_request_submit(osdc, obj_request);
+9 −12
Original line number Diff line number Diff line
@@ -288,7 +288,6 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
	struct page *page = list_entry(page_list->prev, struct page, lru);
	struct ceph_vino vino;
	struct ceph_osd_request *req;
	struct ceph_osd_req_op op;
	u64 off;
	u64 len;
	int i;
@@ -314,7 +313,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
	     off, len);
	vino = ceph_vino(inode);
	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
				    1, &op, CEPH_OSD_OP_READ,
				    1, CEPH_OSD_OP_READ,
				    CEPH_OSD_FLAG_READ, NULL,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    false);
@@ -349,7 +348,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
	req->r_callback = finish_read;
	req->r_inode = inode;

	ceph_osdc_build_request(req, off, 1, &op, NULL, vino.snap, NULL);
	ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);

	dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
	ret = ceph_osdc_start_request(osdc, req, false);
@@ -567,7 +566,7 @@ static void writepages_finish(struct ceph_osd_request *req,
	struct ceph_snap_context *snapc = req->r_snapc;
	struct address_space *mapping = inode->i_mapping;
	int rc = req->r_result;
	u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
	u64 bytes = req->r_ops[0].extent.length;
	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
	long writeback_stat;
	unsigned issued = ceph_caps_issued(ci);
@@ -635,8 +634,7 @@ static void writepages_finish(struct ceph_osd_request *req,

static struct ceph_osd_request *
ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len,
				struct ceph_snap_context *snapc,
				int num_ops, struct ceph_osd_req_op *ops)
				struct ceph_snap_context *snapc, int num_ops)
{
	struct ceph_fs_client *fsc;
	struct ceph_inode_info *ci;
@@ -648,7 +646,7 @@ ceph_writepages_osd_request(struct inode *inode, u64 offset, u64 *len,
	/* BUG_ON(vino.snap != CEPH_NOSNAP); */

	return ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
			vino, offset, len, num_ops, ops, CEPH_OSD_OP_WRITE,
			vino, offset, len, num_ops, CEPH_OSD_OP_WRITE,
			CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK,
			snapc, ci->i_truncate_seq, ci->i_truncate_size, true);
}
@@ -738,7 +736,6 @@ static int ceph_writepages_start(struct address_space *mapping,
	last_snapc = snapc;

	while (!done && index <= end) {
		struct ceph_osd_req_op ops[2];
		int num_ops = do_sync ? 2 : 1;
		struct ceph_vino vino;
		unsigned i;
@@ -846,7 +843,7 @@ static int ceph_writepages_start(struct address_space *mapping,
				len = wsize;
				req = ceph_writepages_osd_request(inode,
							offset, &len, snapc,
							num_ops, ops);
							num_ops);

				if (IS_ERR(req)) {
					rc = PTR_ERR(req);
@@ -927,11 +924,11 @@ static int ceph_writepages_start(struct address_space *mapping,

		/* Update the write op length in case we changed it */

		osd_req_op_extent_update(&ops[0], len);
		osd_req_op_extent_update(&req->r_ops[0], len);

		vino = ceph_vino(inode);
		ceph_osdc_build_request(req, offset, num_ops, ops,
					snapc, vino.snap, &inode->i_mtime);
		ceph_osdc_build_request(req, offset, snapc, vino.snap,
					&inode->i_mtime);

		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
		BUG_ON(rc);
+2 −4
Original line number Diff line number Diff line
@@ -478,7 +478,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
	struct ceph_snap_context *snapc;
	struct ceph_vino vino;
	struct ceph_osd_request *req;
	struct ceph_osd_req_op ops[2];
	int num_ops = 1;
	struct page **pages;
	int num_pages;
@@ -534,7 +533,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
	snapc = ci->i_snap_realm->cached_context;
	vino = ceph_vino(inode);
	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
				    vino, pos, &len, num_ops, ops,
				    vino, pos, &len, num_ops,
				    CEPH_OSD_OP_WRITE, flags, snapc,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    false);
@@ -579,8 +578,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
					false, own_pages);

	/* BUG_ON(vino.snap != CEPH_NOSNAP); */
	ceph_osdc_build_request(req, pos, num_ops, ops,
				snapc, vino.snap, &mtime);
	ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);

	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!ret) {
+34 −36
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@ struct ceph_osd {
};


#define CEPH_OSD_MAX_OP 10
#define CEPH_OSD_MAX_OP	2

enum ceph_osd_data_type {
	CEPH_OSD_DATA_TYPE_NONE,
@@ -79,6 +79,34 @@ struct ceph_osd_data {
	};
};

struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
	u32 payload_len;
	union {
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
		} extent;
		struct {
			const char *class_name;
			const char *method_name;
			const void *indata;
			u32 indata_len;
			__u8 class_len;
			__u8 method_len;
			__u8 argc;
		} cls;
		struct {
			u64 cookie;
			u64 ver;
			u32 prot_ver;
			u32 timeout;
			__u8 flag;
		} watch;
	};
};

/* an in-flight request */
struct ceph_osd_request {
	u64             r_tid;              /* unique for this client */
@@ -95,10 +123,11 @@ struct ceph_osd_request {
	struct ceph_msg  *r_request, *r_reply;
	int               r_flags;     /* any additional flags for the osd */
	u32               r_sent;      /* >0 if r_request is sending/sent */
	int               r_num_ops;

	/* encoded message content */
	struct ceph_osd_op *r_request_ops;
	/* request osd ops array  */
	unsigned int		r_num_ops;
	struct ceph_osd_req_op	r_ops[CEPH_OSD_MAX_OP];

	/* these are updated on each send */
	__le32           *r_request_osdmap_epoch;
	__le32           *r_request_flags;
@@ -193,34 +222,6 @@ struct ceph_osd_client {
	struct workqueue_struct	*notify_wq;
};

struct ceph_osd_req_op {
	u16 op;           /* CEPH_OSD_OP_* */
	u32 payload_len;
	union {
		struct {
			u64 offset, length;
			u64 truncate_size;
			u32 truncate_seq;
		} extent;
		struct {
			const char *class_name;
			const char *method_name;
			const void *indata;
			u32 indata_len;
			__u8 class_len;
			__u8 method_len;
			__u8 argc;
		} cls;
		struct {
			u64 cookie;
			u64 ver;
			u32 prot_ver;
			u32 timeout;
			__u8 flag;
		} watch;
	};
};

extern int ceph_osdc_init(struct ceph_osd_client *osdc,
			  struct ceph_client *client);
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
@@ -249,8 +250,6 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *
					       gfp_t gfp_flags);

extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
				    unsigned int num_ops,
				    struct ceph_osd_req_op *src_ops,
				    struct ceph_snap_context *snapc,
				    u64 snap_id,
				    struct timespec *mtime);
@@ -259,8 +258,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
				      struct ceph_file_layout *layout,
				      struct ceph_vino vino,
				      u64 offset, u64 *len,
				      int num_ops, struct ceph_osd_req_op *ops,
				      int opcode, int flags,
				      int num_ops, int opcode, int flags,
				      struct ceph_snap_context *snapc,
				      u32 truncate_seq, u64 truncate_size,
				      bool use_mempool);
+2 −2
Original line number Diff line number Diff line
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
	mutex_lock(&osdc->request_mutex);
	for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
		struct ceph_osd_request *req;
		unsigned int i;
		int opcode;
		int i;

		req = rb_entry(p, struct ceph_osd_request, r_node);

@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
			seq_printf(s, "\t");

		for (i = 0; i < req->r_num_ops; i++) {
			opcode = le16_to_cpu(req->r_request_ops[i].op);
			opcode = req->r_ops[i].op;
			seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
		}

Loading