Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1cf0209c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull Ceph updates from Sage Weil:
 "A few groups of patches here.  Alex has been hard at work improving
  the RBD code, layout groundwork for understanding the new formats and
  doing layering.  Most of the infrastructure is now in place for the
  final bits that will come with the next window.

  There are a few changes to the data layout.  Jim Schutt's patch fixes
  some non-ideal CRUSH behavior, and a set of patches from me updates
  the client to speak a newer version of the protocol and implement an
  improved hashing strategy across storage nodes (when the server side
  supports it too).

  A pair of patches from Sam Lang fix the atomicity of open+create
  operations.  Several patches from Yan, Zheng fix various mds/client
  issues that turned up during multi-mds torture tests.

  A final set of patches expose file layouts via virtual xattrs, and
  allow the policies to be set on directories via xattrs as well
  (avoiding the awkward ioctl interface and providing a consistent
  interface for both kernel mount and ceph-fuse users)."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (143 commits)
  libceph: add support for HASHPSPOOL pool flag
  libceph: update osd request/reply encoding
  libceph: calculate placement based on the internal data types
  ceph: update support for PGID64, PGPOOL3, OSDENC protocol features
  ceph: update "ceph_features.h"
  libceph: decode into cpu-native ceph_pg type
  libceph: rename ceph_pg -> ceph_pg_v1
  rbd: pass length, not op for osd completions
  rbd: move rbd_osd_trivial_callback()
  libceph: use a do..while loop in con_work()
  libceph: use a flag to indicate a fault has occurred
  libceph: separate non-locked fault handling
  libceph: encapsulate connection backoff
  libceph: eliminate sparse warnings
  ceph: eliminate sparse warnings in fs code
  rbd: eliminate sparse warnings
  libceph: define connection flag helpers
  rbd: normalize dout() calls
  rbd: barriers are hard
  rbd: ignore zero-length requests
  ...
parents de1a2262 83ca14fd
Loading
Loading
Loading
Loading
+1161 −691

File changed.

Preview size limit exceeded, changes collapsed.

+9 −29
Original line number Diff line number Diff line
@@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
{
	struct inode *inode = req->r_inode;
	struct ceph_osd_reply_head *replyhead;
	int rc, bytes;
	int rc = req->r_result;
	int bytes = le32_to_cpu(msg->hdr.data_len);
	int i;

	/* parse reply */
	replyhead = msg->front.iov_base;
	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
	rc = le32_to_cpu(replyhead->result);
	bytes = le32_to_cpu(msg->hdr.data_len);

	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);

	/* unlock all pages, zeroing any data we didn't read */
@@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
				    NULL, 0,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    NULL, false, 1, 0);
				    NULL, false, 0);
	if (IS_ERR(req))
		return PTR_ERR(req);

@@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
				   &ci->i_layout, snapc,
				   page_off, len,
				   ci->i_truncate_seq, ci->i_truncate_size,
				   &inode->i_mtime,
				   &page, 1, 0, 0, true);
				   &inode->i_mtime, &page, 1);
	if (err < 0) {
		dout("writepage setting page/mapping error %d %p\n", err, page);
		SetPageError(page);
@@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req,
			      struct ceph_msg *msg)
{
	struct inode *inode = req->r_inode;
	struct ceph_osd_reply_head *replyhead;
	struct ceph_osd_op *op;
	struct ceph_inode_info *ci = ceph_inode(inode);
	unsigned wrote;
	struct page *page;
	int i;
	struct ceph_snap_context *snapc = req->r_snapc;
	struct address_space *mapping = inode->i_mapping;
	__s32 rc = -EIO;
	u64 bytes = 0;
	int rc = req->r_result;
	u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length);
	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
	long writeback_stat;
	unsigned issued = ceph_caps_issued(ci);

	/* parse reply */
	replyhead = msg->front.iov_base;
	WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
	op = (void *)(replyhead + 1);
	rc = le32_to_cpu(replyhead->result);
	bytes = le64_to_cpu(op->extent.length);

	if (rc >= 0) {
		/*
		 * Assume we wrote the pages we originally sent.  The
@@ -741,8 +725,6 @@ retry:
		struct page *page;
		int want;
		u64 offset, len;
		struct ceph_osd_request_head *reqhead;
		struct ceph_osd_op *op;
		long writeback_stat;

		next = 0;
@@ -838,7 +820,7 @@ get_more_pages:
					    snapc, do_sync,
					    ci->i_truncate_seq,
					    ci->i_truncate_size,
					    &inode->i_mtime, true, 1, 0);
					    &inode->i_mtime, true, 0);

				if (IS_ERR(req)) {
					rc = PTR_ERR(req);
@@ -906,10 +888,8 @@ get_more_pages:

		/* revise final length, page count */
		req->r_num_pages = locked_pages;
		reqhead = req->r_request->front.iov_base;
		op = (void *)(reqhead + 1);
		op->extent.length = cpu_to_le64(len);
		op->payload_len = cpu_to_le32(len);
		req->r_request_ops[0].extent.length = cpu_to_le64(len);
		req->r_request_ops[0].payload_len = cpu_to_le32(len);
		req->r_request->hdr.data_len = cpu_to_le32(len);

		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
+25 −7
Original line number Diff line number Diff line
@@ -611,8 +611,16 @@ retry:

	if (flags & CEPH_CAP_FLAG_AUTH)
		ci->i_auth_cap = cap;
	else if (ci->i_auth_cap == cap)
	else if (ci->i_auth_cap == cap) {
		ci->i_auth_cap = NULL;
		spin_lock(&mdsc->cap_dirty_lock);
		if (!list_empty(&ci->i_dirty_item)) {
			dout(" moving %p to cap_dirty_migrating\n", inode);
			list_move(&ci->i_dirty_item,
				  &mdsc->cap_dirty_migrating);
		}
		spin_unlock(&mdsc->cap_dirty_lock);
	}

	dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
	     inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
@@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
	struct ceph_mds_client *mdsc = fsc->mdsc;
	struct inode *inode = &ci->vfs_inode;
	struct ceph_cap *cap;
	int file_wanted, used;
	int file_wanted, used, cap_used;
	int took_snap_rwsem = 0;             /* true if mdsc->snap_rwsem held */
	int issued, implemented, want, retain, revoking, flushing = 0;
	int mds = -1;   /* keep track of how far we've gone through i_caps list
@@ -1563,9 +1571,14 @@ retry_locked:

		/* NOTE: no side-effects allowed, until we take s_mutex */

		cap_used = used;
		if (ci->i_auth_cap && cap != ci->i_auth_cap)
			cap_used &= ~ci->i_auth_cap->issued;

		revoking = cap->implemented & ~cap->issued;
		dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
		dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
		     cap->mds, cap, ceph_cap_string(cap->issued),
		     ceph_cap_string(cap_used),
		     ceph_cap_string(cap->implemented),
		     ceph_cap_string(revoking));

@@ -1593,7 +1606,7 @@ retry_locked:
		}

		/* completed revocation? going down and there are no caps? */
		if (revoking && (revoking & used) == 0) {
		if (revoking && (revoking & cap_used) == 0) {
			dout("completed revocation of %s\n",
			     ceph_cap_string(cap->implemented & ~cap->issued));
			goto ack;
@@ -1670,8 +1683,8 @@ ack:
		sent++;

		/* __send_cap drops i_ceph_lock */
		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
				      retain, flushing, NULL);
		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used,
				      want, retain, flushing, NULL);
		goto retry; /* retake i_ceph_lock and restart our cap scan. */
	}

@@ -2417,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
		dout("mds wanted %s -> %s\n",
		     ceph_cap_string(le32_to_cpu(grant->wanted)),
		     ceph_cap_string(wanted));
		grant->wanted = cpu_to_le32(wanted);
		/* imported cap may not have correct mds_wanted */
		if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT)
			check_caps = 1;
	}

	cap->seq = seq;
@@ -2821,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
	     (unsigned)seq);

	if (op == CEPH_CAP_OP_IMPORT)
		ceph_add_cap_releases(mdsc, session);

	/* lookup ino */
	inode = ceph_find_inode(sb, vino);
	ci = ceph_inode(inode);
+7 −1
Original line number Diff line number Diff line
@@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
	err = ceph_mdsc_do_request(mdsc,
				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
				   req);
	if (err)
		goto out_err;

	err = ceph_handle_snapdir(req, dentry, err);
	if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
		err = ceph_handle_notrace_create(dir, dentry);
@@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
		err = finish_no_open(file, dn);
	} else {
		dout("atomic_open finish_open on dn %p\n", dn);
		if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
			*opened |= FILE_CREATED;
		}
		err = finish_open(file, dentry, ceph_open, opened);
	}

@@ -535,7 +541,7 @@ more:
				    ci->i_snap_realm->cached_context,
				    do_sync,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    &mtime, false, 2, page_align);
				    &mtime, false, page_align);
	if (IS_ERR(req))
		return PTR_ERR(req);

+2 −4
Original line number Diff line number Diff line
@@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
		&ceph_sb_to_client(inode->i_sb)->client->osdc;
	u64 len = 1, olen;
	u64 tmp;
	struct ceph_object_layout ol;
	struct ceph_pg pgid;
	int r;

@@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
		return -EFAULT;

	down_read(&osdc->map_sem);
	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len,
	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
					  &dl.object_no, &dl.object_offset,
					  &olen);
	if (r < 0)
@@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)

	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
		 ceph_ino(inode), dl.object_no);
	ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
	ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout,
				osdc->osdmap);

	pgid = ol.ol_pgid;
	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
	if (dl.osd >= 0) {
		struct ceph_entity_addr *a =
Loading