Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 57666509 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ceph updates from Sage Weil:
 "The big item here is support for inline data for CephFS and for
  message signatures from Zheng.  There are also several bug fixes,
  including interrupted flock request handling, 0-length xattrs, mksnap,
  cached readdir results, and a message version compat field.  Finally
  there are several cleanups from Ilya, Dan, and Markus.

  Note that there is another series coming soon that fixes some bugs in
  the RBD 'lingering' requests, but it isn't quite ready yet"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (27 commits)
  ceph: fix setting empty extended attribute
  ceph: fix mksnap crash
  ceph: do_sync is never initialized
  libceph: fixup includes in pagelist.h
  ceph: support inline data feature
  ceph: flush inline version
  ceph: convert inline data to normal data before data write
  ceph: sync read inline data
  ceph: fetch inline data when getting Fcr cap refs
  ceph: use getattr request to fetch inline data
  ceph: add inline data to pagecache
  ceph: parse inline data in MClientReply and MClientCaps
  libceph: specify position of extent operation
  libceph: add CREATE osd operation support
  libceph: add SETXATTR/CMPXATTR osd operations support
  rbd: don't treat CEPH_OSD_OP_DELETE as extent op
  ceph: remove unused stringification macros
  libceph: require cephx message signature by default
  ceph: introduce global empty snap context
  ceph: message versioning fixes
  ...
parents 87c31b39 0aeff37a
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
@@ -2370,8 +2370,12 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
		opcode = CEPH_OSD_OP_READ;
	}

	osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length,
				0, 0);
	if (opcode == CEPH_OSD_OP_DELETE)
		osd_req_op_init(osd_request, num_ops, opcode);
	else
		osd_req_op_extent_init(osd_request, num_ops, opcode,
				       offset, length, 0, 0);

	if (obj_request->type == OBJ_REQUEST_BIO)
		osd_req_op_extent_osd_data_bio(osd_request, num_ops,
					obj_request->bio_list, length);
@@ -3405,7 +3409,6 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
	if (result)
		rbd_warn(rbd_dev, "%s %llx at %llx result %d",
			 obj_op_name(op_type), length, offset, result);
	if (snapc)
	ceph_put_snap_context(snapc);
	blk_end_request_all(rq, result);
}
+262 −11
Original line number Diff line number Diff line
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
	struct ceph_osd_client *osdc =
		&ceph_inode_to_client(inode)->client->osdc;
	int err = 0;
	u64 off = page_offset(page);
	u64 len = PAGE_CACHE_SIZE;

	err = ceph_readpage_from_fscache(inode, page);
	if (off >= i_size_read(inode)) {
		zero_user_segment(page, err, PAGE_CACHE_SIZE);
		SetPageUptodate(page);
		return 0;
	}

	/*
	 * Uptodate inline data should have been added into page cache
	 * while getting Fcr caps.
	 */
	if (ci->i_inline_version != CEPH_INLINE_NONE)
		return -EINVAL;

	err = ceph_readpage_from_fscache(inode, page);
	if (err == 0)
		goto out;

	dout("readpage inode %p file %p page %p index %lu\n",
	     inode, filp, page, page->index);
	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
				  (u64) page_offset(page), &len,
				  off, &len,
				  ci->i_truncate_seq, ci->i_truncate_size,
				  &page, 1, 0);
	if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
	     off, len);
	vino = ceph_vino(inode);
	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
				    1, CEPH_OSD_OP_READ,
				    0, 1, CEPH_OSD_OP_READ,
				    CEPH_OSD_FLAG_READ, NULL,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
	int rc = 0;
	int max = 0;

	if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
		return -EINVAL;

	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
					 &nr_pages);

@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
	int rc = 0;
	unsigned wsize = 1 << inode->i_blkbits;
	struct ceph_osd_request *req = NULL;
	int do_sync;
	int do_sync = 0;
	u64 truncate_size, snap_size;
	u32 truncate_seq;

@@ -750,7 +766,6 @@ static int ceph_writepages_start(struct address_space *mapping,
	last_snapc = snapc;

	while (!done && index <= end) {
		int num_ops = do_sync ? 2 : 1;
		unsigned i;
		int first;
		pgoff_t next;
@@ -850,7 +865,8 @@ static int ceph_writepages_start(struct address_space *mapping,
				len = wsize;
				req = ceph_osdc_new_request(&fsc->client->osdc,
							&ci->i_layout, vino,
							offset, &len, num_ops,
							offset, &len, 0,
							do_sync ? 2 : 1,
							CEPH_OSD_OP_WRITE,
							CEPH_OSD_FLAG_WRITE |
							CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ static int ceph_writepages_start(struct address_space *mapping,
					break;
				}

				if (do_sync)
					osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);

				req->r_callback = writepages_finish;
				req->r_inode = inode;

@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
	struct inode *inode = file_inode(vma->vm_file);
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_file_info *fi = vma->vm_file->private_data;
	struct page *pinned_page = NULL;
	loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
	int want, got, ret;

@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
		want = CEPH_CAP_FILE_CACHE;
	while (1) {
		got = 0;
		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
		ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
				    -1, &got, &pinned_page);
		if (ret == 0)
			break;
		if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));

	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
	    ci->i_inline_version == CEPH_INLINE_NONE)
		ret = filemap_fault(vma, vmf);
	else
		ret = -EAGAIN;

	dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
	     inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
	if (pinned_page)
		page_cache_release(pinned_page);
	ceph_put_cap_refs(ci, got);

	if (ret != -EAGAIN)
		return ret;

	/* read inline data */
	if (off >= PAGE_CACHE_SIZE) {
		/* does not support inline data > PAGE_SIZE */
		ret = VM_FAULT_SIGBUS;
	} else {
		int ret1;
		struct address_space *mapping = inode->i_mapping;
		struct page *page = find_or_create_page(mapping, 0,
						mapping_gfp_mask(mapping) &
						~__GFP_FS);
		if (!page) {
			ret = VM_FAULT_OOM;
			goto out;
		}
		ret1 = __ceph_do_getattr(inode, page,
					 CEPH_STAT_CAP_INLINE_DATA, true);
		if (ret1 < 0 || off >= i_size_read(inode)) {
			unlock_page(page);
			page_cache_release(page);
			ret = VM_FAULT_SIGBUS;
			goto out;
		}
		if (ret1 < PAGE_CACHE_SIZE)
			zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
		else
			flush_dcache_page(page);
		SetPageUptodate(page);
		vmf->page = page;
		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
	}
out:
	dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
	     inode, off, (size_t)PAGE_CACHE_SIZE, ret);
	return ret;
}

@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	size_t len;
	int want, got, ret;

	if (ci->i_inline_version != CEPH_INLINE_NONE) {
		struct page *locked_page = NULL;
		if (off == 0) {
			lock_page(page);
			locked_page = page;
		}
		ret = ceph_uninline_data(vma->vm_file, locked_page);
		if (locked_page)
			unlock_page(locked_page);
		if (ret < 0)
			return VM_FAULT_SIGBUS;
	}

	if (off + PAGE_CACHE_SIZE <= size)
		len = PAGE_CACHE_SIZE;
	else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
		want = CEPH_CAP_FILE_BUFFER;
	while (1) {
		got = 0;
		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
		ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
				    &got, NULL);
		if (ret == 0)
			break;
		if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
			ret = VM_FAULT_SIGBUS;
	}
out:
	if (ret != VM_FAULT_LOCKED) {
	if (ret != VM_FAULT_LOCKED)
		unlock_page(page);
	} else {
	if (ret == VM_FAULT_LOCKED ||
	    ci->i_inline_version != CEPH_INLINE_NONE) {
		int dirty;
		spin_lock(&ci->i_ceph_lock);
		ci->i_inline_version = CEPH_INLINE_NONE;
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		spin_unlock(&ci->i_ceph_lock);
		if (dirty)
@@ -1315,6 +1394,178 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
	return ret;
}

void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
			   char	*data, size_t len)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	if (locked_page) {
		page = locked_page;
	} else {
		if (i_size_read(inode) == 0)
			return;
		page = find_or_create_page(mapping, 0,
					   mapping_gfp_mask(mapping) & ~__GFP_FS);
		if (!page)
			return;
		if (PageUptodate(page)) {
			unlock_page(page);
			page_cache_release(page);
			return;
		}
	}

	dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
	     inode, ceph_vinop(inode), len, locked_page);

	if (len > 0) {
		void *kaddr = kmap_atomic(page);
		memcpy(kaddr, data, len);
		kunmap_atomic(kaddr);
	}

	if (page != locked_page) {
		if (len < PAGE_CACHE_SIZE)
			zero_user_segment(page, len, PAGE_CACHE_SIZE);
		else
			flush_dcache_page(page);

		SetPageUptodate(page);
		unlock_page(page);
		page_cache_release(page);
	}
}

int ceph_uninline_data(struct file *filp, struct page *locked_page)
{
	struct inode *inode = file_inode(filp);
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
	struct ceph_osd_request *req;
	struct page *page = NULL;
	u64 len, inline_version;
	int err = 0;
	bool from_pagecache = false;

	spin_lock(&ci->i_ceph_lock);
	inline_version = ci->i_inline_version;
	spin_unlock(&ci->i_ceph_lock);

	dout("uninline_data %p %llx.%llx inline_version %llu\n",
	     inode, ceph_vinop(inode), inline_version);

	if (inline_version == 1 || /* initial version, no data */
	    inline_version == CEPH_INLINE_NONE)
		goto out;

	if (locked_page) {
		page = locked_page;
		WARN_ON(!PageUptodate(page));
	} else if (ceph_caps_issued(ci) &
		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
		page = find_get_page(inode->i_mapping, 0);
		if (page) {
			if (PageUptodate(page)) {
				from_pagecache = true;
				lock_page(page);
			} else {
				page_cache_release(page);
				page = NULL;
			}
		}
	}

	if (page) {
		len = i_size_read(inode);
		if (len > PAGE_CACHE_SIZE)
			len = PAGE_CACHE_SIZE;
	} else {
		page = __page_cache_alloc(GFP_NOFS);
		if (!page) {
			err = -ENOMEM;
			goto out;
		}
		err = __ceph_do_getattr(inode, page,
					CEPH_STAT_CAP_INLINE_DATA, true);
		if (err < 0) {
			/* no inline data */
			if (err == -ENODATA)
				err = 0;
			goto out;
		}
		len = err;
	}

	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
				    ceph_vino(inode), 0, &len, 0, 1,
				    CEPH_OSD_OP_CREATE,
				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
				    ci->i_snap_realm->cached_context,
				    0, 0, false);
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}

	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!err)
		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
	ceph_osdc_put_request(req);
	if (err < 0)
		goto out;

	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
				    ceph_vino(inode), 0, &len, 1, 3,
				    CEPH_OSD_OP_WRITE,
				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
				    ci->i_snap_realm->cached_context,
				    ci->i_truncate_seq, ci->i_truncate_size,
				    false);
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}

	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);

	err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
				    "inline_version", &inline_version,
				    sizeof(inline_version),
				    CEPH_OSD_CMPXATTR_OP_GT,
				    CEPH_OSD_CMPXATTR_MODE_U64);
	if (err)
		goto out_put;

	err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
				    "inline_version", &inline_version,
				    sizeof(inline_version), 0, 0);
	if (err)
		goto out_put;

	ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
	if (!err)
		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
out_put:
	ceph_osdc_put_request(req);
	if (err == -ECANCELED)
		err = 0;
out:
	if (page && page != locked_page) {
		if (from_pagecache) {
			unlock_page(page);
			page_cache_release(page);
		} else
			__free_pages(page, 0);
	}

	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
	     inode, ceph_vinop(inode), inline_version, err);
	return err;
}

static struct vm_operations_struct ceph_vmops = {
	.fault		= ceph_filemap_fault,
	.page_mkwrite	= ceph_page_mkwrite,
+102 −30
Original line number Diff line number Diff line
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
			kuid_t uid, kgid_t gid, umode_t mode,
			u64 xattr_version,
			struct ceph_buffer *xattrs_buf,
			u64 follows)
			u64 follows, bool inline_data)
{
	struct ceph_mds_caps *fc;
	struct ceph_msg *msg;
	void *p;
	size_t extra_len;

	dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
	     " seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
	     seq, issue_seq, mseq, follows, size, max_size,
	     xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);

	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
	/* flock buffer size + inline version + inline data size */
	extra_len = 4 + 8 + 4;
	msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
			   GFP_NOFS, false);
	if (!msg)
		return -ENOMEM;

@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
	fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
	fc->mode = cpu_to_le32(mode);

	p = fc + 1;
	/* flock buffer size */
	ceph_encode_32(&p, 0);
	/* inline version */
	ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
	/* inline data size */
	ceph_encode_32(&p, 0);

	fc->xattr_version = cpu_to_le64(xattr_version);
	if (xattrs_buf) {
		msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
	u64 flush_tid = 0;
	int i;
	int ret;
	bool inline_data;

	held = cap->issued | cap->implemented;
	revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
		xattr_version = ci->i_xattrs.version;
	}

	inline_data = ci->i_inline_version != CEPH_INLINE_NONE;

	spin_unlock(&ci->i_ceph_lock);

	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
		size, max_size, &mtime, &atime, time_warp_seq,
		uid, gid, mode, xattr_version, xattr_blob,
		follows);
		follows, inline_data);
	if (ret < 0) {
		dout("error sending cap msg, must requeue %p\n", inode);
		delayed = 1;
@@ -1336,7 +1352,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
			     capsnap->time_warp_seq,
			     capsnap->uid, capsnap->gid, capsnap->mode,
			     capsnap->xattr_version, capsnap->xattr_blob,
			     capsnap->follows);
			     capsnap->follows, capsnap->inline_data);

		next_follows = capsnap->follows + 1;
		ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
 * requested from the MDS.
 */
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
			    int *got, loff_t endoff, int *check_max, int *err)
			    loff_t endoff, int *got, struct page **pinned_page,
			    int *check_max, int *err)
{
	struct inode *inode = &ci->vfs_inode;
	int ret = 0;
	int have, implemented;
	int have, implemented, _got = 0;
	int file_wanted;

	dout("get_cap_refs %p need %s want %s\n", inode,
	     ceph_cap_string(need), ceph_cap_string(want));
again:
	spin_lock(&ci->i_ceph_lock);

	/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
		     ceph_cap_string(need), ceph_cap_string(file_wanted));
		*err = -EBADF;
		ret = 1;
		goto out;
		goto out_unlock;
	}

	/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
				*check_max = 1;
				ret = 1;
			}
			goto out;
			goto out_unlock;
		}
		/*
		 * If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
		 */
		if (__ceph_have_pending_cap_snap(ci)) {
			dout("get_cap_refs %p cap_snap_pending\n", inode);
			goto out;
			goto out_unlock;
		}
	}

@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
		     inode, ceph_cap_string(have), ceph_cap_string(not),
		     ceph_cap_string(revoking));
		if ((revoking & not) == 0) {
			*got = need | (have & want);
			__take_cap_refs(ci, *got);
			_got = need | (have & want);
			__take_cap_refs(ci, _got);
			ret = 1;
		}
	} else {
		dout("get_cap_refs %p have %s needed %s\n", inode,
		     ceph_cap_string(have), ceph_cap_string(need));
	}
out:
out_unlock:
	spin_unlock(&ci->i_ceph_lock);

	if (ci->i_inline_version != CEPH_INLINE_NONE &&
	    (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
	    i_size_read(inode) > 0) {
		int ret1;
		struct page *page = find_get_page(inode->i_mapping, 0);
		if (page) {
			if (PageUptodate(page)) {
				*pinned_page = page;
				goto out;
			}
			page_cache_release(page);
		}
		/*
		 * drop cap refs first because getattr while holding
		 * caps refs can cause deadlock.
		 */
		ceph_put_cap_refs(ci, _got);
		_got = 0;

		/* getattr request will bring inline data into page cache */
		ret1 = __ceph_do_getattr(inode, NULL,
					 CEPH_STAT_CAP_INLINE_DATA, true);
		if (ret1 >= 0) {
			ret = 0;
			goto again;
		}
		*err = ret1;
		ret = 1;
	}
out:
	dout("get_cap_refs %p ret %d got %s\n", inode,
	     ret, ceph_cap_string(*got));
	     ret, ceph_cap_string(_got));
	*got = _got;
	return ret;
}

@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
 * due to a small max_size, make sure we check_max_size (and possibly
 * ask the mds) so we don't get hung up indefinitely.
 */
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
		  loff_t endoff)
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
		  loff_t endoff, int *got, struct page **pinned_page)
{
	int check_max, ret, err;

@@ -2179,8 +2229,8 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
	check_max = 0;
	err = 0;
	ret = wait_event_interruptible(ci->i_cap_wq,
				       try_get_cap_refs(ci, need, want,
							got, endoff,
				       try_get_cap_refs(ci, need, want, endoff,
							got, pinned_page,
							&check_max, &err));
	if (err)
		ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
static void handle_cap_grant(struct ceph_mds_client *mdsc,
			     struct inode *inode, struct ceph_mds_caps *grant,
			     void *snaptrace, int snaptrace_len,
			     u64 inline_version,
			     void *inline_data, int inline_len,
			     struct ceph_buffer *xattr_buf,
			     struct ceph_mds_session *session,
			     struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	bool queue_invalidate = false;
	bool queue_revalidate = false;
	bool deleted_inode = false;
	bool fill_inline = false;

	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	}
	BUG_ON(cap->issued & ~cap->implemented);

	if (inline_version > 0 && inline_version >= ci->i_inline_version) {
		ci->i_inline_version = inline_version;
		if (ci->i_inline_version != CEPH_INLINE_NONE &&
		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
			fill_inline = true;
	}

	spin_unlock(&ci->i_ceph_lock);

	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
			wake = true;
	}

	if (fill_inline)
		ceph_fill_inline_data(inode, NULL, inline_data, inline_len);

	if (queue_trunc) {
		ceph_queue_vmtruncate(inode);
		ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	u64 cap_id;
	u64 size, max_size;
	u64 tid;
	u64 inline_version = 0;
	void *inline_data = NULL;
	u32  inline_len = 0;
	void *snaptrace;
	size_t snaptrace_len;
	void *flock;
	void *end;
	u32 flock_len;
	void *p, *end;

	dout("handle_caps from mds%d\n", mds);

@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,

	snaptrace = h + 1;
	snaptrace_len = le32_to_cpu(h->snap_trace_len);
	p = snaptrace + snaptrace_len;

	if (le16_to_cpu(msg->hdr.version) >= 2) {
		void *p = snaptrace + snaptrace_len;
		u32 flock_len;
		ceph_decode_32_safe(&p, end, flock_len, bad);
		if (p + flock_len > end)
			goto bad;
		flock = p;
	} else {
		flock = NULL;
		flock_len = 0;
		p += flock_len;
	}

	if (le16_to_cpu(msg->hdr.version) >= 3) {
		if (op == CEPH_CAP_OP_IMPORT) {
			void *p = flock + flock_len;
			if (p + sizeof(*peer) > end)
				goto bad;
			peer = p;
			p += sizeof(*peer);
		} else if (op == CEPH_CAP_OP_EXPORT) {
			/* recorded in unused fields */
			peer = (void *)&h->size;
		}
	}

	if (le16_to_cpu(msg->hdr.version) >= 4) {
		ceph_decode_64_safe(&p, end, inline_version, bad);
		ceph_decode_32_safe(&p, end, inline_len, bad);
		if (p + inline_len > end)
			goto bad;
		inline_data = p;
		p += inline_len;
	}

	/* lookup ino */
	inode = ceph_find_inode(sb, vino);
	ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		handle_cap_import(mdsc, inode, h, peer, session,
				  &cap, &issued);
		handle_cap_grant(mdsc, inode, h,  snaptrace, snaptrace_len,
				 inline_version, inline_data, inline_len,
				 msg->middle, session, cap, issued);
		goto done_unlocked;
	}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	case CEPH_CAP_OP_GRANT:
		__ceph_caps_issued(ci, &issued);
		issued |= __ceph_caps_dirty(ci);
		handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
				 session, cap, issued);
		handle_cap_grant(mdsc, inode, h, NULL, 0,
				 inline_version, inline_data, inline_len,
				 msg->middle, session, cap, issued);
		goto done_unlocked;

	case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,7 +3210,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
done:
	mutex_unlock(&session->s_mutex);
done_unlocked:
	if (inode)
	iput(inode);
	return;

+18 −9
Original line number Diff line number Diff line
@@ -183,7 +183,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
	spin_unlock(&parent->d_lock);

	/* make sure a dentry wasn't dropped while we didn't have parent lock */
	if (!ceph_dir_is_complete(dir)) {
	if (!ceph_dir_is_complete_ordered(dir)) {
		dout(" lost dir complete on %p; falling back to mds\n", dir);
		dput(dentry);
		err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)

	/* always start with . and .. */
	if (ctx->pos == 0) {
		/* note dir version at start of readdir so we can tell
		 * if any dentries get dropped */
		fi->dir_release_count = atomic_read(&ci->i_release_count);

		dout("readdir off 0 -> '.'\n");
		if (!dir_emit(ctx, ".", 1, 
			    ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
	if ((ctx->pos == 2 || fi->dentry) &&
	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
	    ceph_snap(inode) != CEPH_SNAPDIR &&
	    __ceph_dir_is_complete(ci) &&
	    __ceph_dir_is_complete_ordered(ci) &&
	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
		u32 shared_gen = ci->i_shared_gen;
		spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)

	/* proceed with a normal readdir */

	if (ctx->pos == 2) {
		/* note dir version at start of readdir so we can tell
		 * if any dentries get dropped */
		fi->dir_release_count = atomic_read(&ci->i_release_count);
		fi->dir_ordered_count = ci->i_ordered_count;
	}

more:
	/* do we have the correct frag content buffered? */
	if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
	 */
	spin_lock(&ci->i_ceph_lock);
	if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
		if (ci->i_ordered_count == fi->dir_ordered_count)
			dout(" marking %p complete and ordered\n", inode);
		else
			dout(" marking %p complete\n", inode);
		__ceph_dir_set_complete(ci, fi->dir_release_count);
		__ceph_dir_set_complete(ci, fi->dir_release_count,
					fi->dir_ordered_count);
	}
	spin_unlock(&ci->i_ceph_lock);

@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
		acls.pagelist = NULL;
	}
	err = ceph_mdsc_do_request(mdsc, dir, req);
	if (!err && !req->r_reply_info.head->is_dentry)
	if (!err &&
	    !req->r_reply_info.head->is_target &&
	    !req->r_reply_info.head->is_dentry)
		err = ceph_handle_notrace_create(dir, dentry);
	ceph_mdsc_put_request(req);
out:
+83 −14

File changed.

Preview size limit exceeded, changes collapsed.

Loading