Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dc594c39 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.18-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The main piece is a set of libceph changes that revamps how OSD
  requests are aborted, improving CephFS ENOSPC handling and making
  "umount -f" actually work (Zheng and myself).

  The rest is mostly mount option handling cleanups from Chengguang and
  assorted fixes from Zheng, Luis and Dongsheng.

* tag 'ceph-for-4.18-rc1' of git://github.com/ceph/ceph-client: (31 commits)
  rbd: flush rbd_dev->watch_dwork after watch is unregistered
  ceph: update description of some mount options
  ceph: show ino32 if the value is different with default
  ceph: strengthen rsize/wsize/readdir_max_bytes validation
  ceph: fix alignment of rasize
  ceph: fix use-after-free in ceph_statfs()
  ceph: prevent i_version from going back
  ceph: fix wrong check for the case of updating link count
  libceph: allocate the locator string with GFP_NOFAIL
  libceph: make abort_on_full a per-osdc setting
  libceph: don't abort reads in ceph_osdc_abort_on_full()
  libceph: avoid a use-after-free during map check
  libceph: don't warn if req->r_abort_on_full is set
  libceph: use for_each_request() in ceph_osdc_abort_on_full()
  libceph: defer __complete_request() to a workqueue
  libceph: move more code into __complete_request()
  libceph: no need to call flush_workqueue() before destruction
  ceph: flush pending works before shutdown super
  ceph: abort osd requests on force umount
  libceph: introduce ceph_osdc_abort_requests()
  ...
parents e7655d2b 23edca86
Loading
Loading
Loading
Loading
+3 −5
Original line number Diff line number Diff line
@@ -105,15 +105,13 @@ Mount Options
	address its connection to the monitor originates from.

  wsize=X
	Specify the maximum write size in bytes.  By default there is no
	maximum.  Ceph will normally size writes based on the file stripe
	size.
	Specify the maximum write size in bytes.  Default: 16 MB.

  rsize=X
	Specify the maximum read size in bytes.  Default: 64 MB.
	Specify the maximum read size in bytes.  Default: 16 MB.

  rasize=X
	Specify the maximum readahead.  Default: 8 MB.
	Specify the maximum readahead size in bytes.  Default: 8 MB.

  mount_timeout=X
	Specify the timeout value for mount (in seconds), in the case
+7 −4
Original line number Diff line number Diff line
@@ -2339,6 +2339,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
{
	unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
	int ret;

	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
	rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
@@ -2353,6 +2354,11 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
	if (!obj_req->osd_req)
		return -ENOMEM;

	ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
				  "copyup");
	if (ret)
		return ret;

	/*
	 * Only send non-zero copyup data to save some I/O and network
	 * bandwidth -- zero copyup data is equivalent to the object not
@@ -2362,9 +2368,6 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
		dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
		bytes = 0;
	}

	osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
			    "copyup");
	osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
					  obj_req->copyup_bvecs,
					  obj_req->copyup_bvec_count,
@@ -3397,7 +3400,6 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
{
	dout("%s rbd_dev %p\n", __func__, rbd_dev);

	cancel_delayed_work_sync(&rbd_dev->watch_dwork);
	cancel_work_sync(&rbd_dev->acquired_lock_work);
	cancel_work_sync(&rbd_dev->released_lock_work);
	cancel_delayed_work_sync(&rbd_dev->lock_dwork);
@@ -3415,6 +3417,7 @@ static void rbd_unregister_watch(struct rbd_device *rbd_dev)
	rbd_dev->watch_state = RBD_WATCH_STATE_UNREGISTERED;
	mutex_unlock(&rbd_dev->watch_mutex);

	cancel_delayed_work_sync(&rbd_dev->watch_dwork);
	ceph_osdc_flush_notifies(&rbd_dev->rbd_client->client->osdc);
}

+0 −1
Original line number Diff line number Diff line
@@ -1936,7 +1936,6 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);

	wr_req->r_mtime = ci->vfs_inode.i_mtime;
	wr_req->r_abort_on_full = true;
	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);

	if (!err)
+99 −61
Original line number Diff line number Diff line
@@ -69,6 +69,8 @@ static char *gcap_string(char *s, int c)
		*s++ = 'w';
	if (c & CEPH_CAP_GBUFFER)
		*s++ = 'b';
	if (c & CEPH_CAP_GWREXTEND)
		*s++ = 'a';
	if (c & CEPH_CAP_GLAZYIO)
		*s++ = 'l';
	return s;
@@ -3022,30 +3024,41 @@ static void invalidate_aliases(struct inode *inode)
		dput(prev);
}

struct cap_extra_info {
	struct ceph_string *pool_ns;
	/* inline data */
	u64 inline_version;
	void *inline_data;
	u32 inline_len;
	/* dirstat */
	bool dirstat_valid;
	u64 nfiles;
	u64 nsubdirs;
	/* currently issued */
	int issued;
};

/*
 * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
 * actually be a revocation if it specifies a smaller cap set.)
 *
 * caller holds s_mutex and i_ceph_lock, we drop both.
 */
static void handle_cap_grant(struct ceph_mds_client *mdsc,
			     struct inode *inode, struct ceph_mds_caps *grant,
			     struct ceph_string **pns, u64 inline_version,
			     void *inline_data, u32 inline_len,
			     struct ceph_buffer *xattr_buf,
static void handle_cap_grant(struct inode *inode,
			     struct ceph_mds_session *session,
			     struct ceph_cap *cap, int issued)
			     struct ceph_cap *cap,
			     struct ceph_mds_caps *grant,
			     struct ceph_buffer *xattr_buf,
			     struct cap_extra_info *extra_info)
	__releases(ci->i_ceph_lock)
	__releases(mdsc->snap_rwsem)
	__releases(session->s_mdsc->snap_rwsem)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int mds = session->s_mds;
	int seq = le32_to_cpu(grant->seq);
	int newcaps = le32_to_cpu(grant->caps);
	int used, wanted, dirty;
	u64 size = le64_to_cpu(grant->size);
	u64 max_size = le64_to_cpu(grant->max_size);
	struct timespec mtime, atime, ctime;
	int check_caps = 0;
	bool wake = false;
	bool writeback = false;
@@ -3055,7 +3068,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	bool fill_inline = false;

	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
	     inode, cap, mds, seq, ceph_cap_string(newcaps));
	     inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
		inode->i_size);

@@ -3101,7 +3114,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	__check_cap_issue(ci, cap, newcaps);

	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
	    (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) {
		inode->i_mode = le32_to_cpu(grant->mode);
		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -3110,15 +3123,16 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
		     from_kgid(&init_user_ns, inode->i_gid));
	}

	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
	    (issued & CEPH_CAP_LINK_EXCL) == 0) {
	if ((newcaps & CEPH_CAP_LINK_SHARED) &&
	    (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
		set_nlink(inode, le32_to_cpu(grant->nlink));
		if (inode->i_nlink == 0 &&
		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
			deleted_inode = true;
	}

	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
	if ((extra_info->issued & CEPH_CAP_XATTR_EXCL) == 0 &&
	    grant->xattr_len) {
		int len = le32_to_cpu(grant->xattr_len);
		u64 version = le64_to_cpu(grant->xattr_version);

@@ -3134,15 +3148,21 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	}

	if (newcaps & CEPH_CAP_ANY_RD) {
		struct timespec mtime, atime, ctime;
		/* ctime/mtime/atime? */
		ceph_decode_timespec(&mtime, &grant->mtime);
		ceph_decode_timespec(&atime, &grant->atime);
		ceph_decode_timespec(&ctime, &grant->ctime);
		ceph_fill_file_time(inode, issued,
		ceph_fill_file_time(inode, extra_info->issued,
				    le32_to_cpu(grant->time_warp_seq),
				    &ctime, &mtime, &atime);
	}

	if ((newcaps & CEPH_CAP_FILE_SHARED) && extra_info->dirstat_valid) {
		ci->i_files = extra_info->nfiles;
		ci->i_subdirs = extra_info->nsubdirs;
	}

	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
		/* file layout may have changed */
		s64 old_pool = ci->i_layout.pool_id;
@@ -3151,15 +3171,16 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
		ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout);
		old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
					lockdep_is_held(&ci->i_ceph_lock));
		rcu_assign_pointer(ci->i_layout.pool_ns, *pns);
		rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns);

		if (ci->i_layout.pool_id != old_pool || *pns != old_ns)
		if (ci->i_layout.pool_id != old_pool ||
		    extra_info->pool_ns != old_ns)
			ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;

		*pns = old_ns;
		extra_info->pool_ns = old_ns;

		/* size/truncate_seq? */
		queue_trunc = ceph_fill_file_size(inode, issued,
		queue_trunc = ceph_fill_file_size(inode, extra_info->issued,
					le32_to_cpu(grant->truncate_seq),
					le64_to_cpu(grant->truncate_size),
					size);
@@ -3238,24 +3259,26 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
	}
	BUG_ON(cap->issued & ~cap->implemented);

	if (inline_version > 0 && inline_version >= ci->i_inline_version) {
		ci->i_inline_version = inline_version;
	if (extra_info->inline_version > 0 &&
	    extra_info->inline_version >= ci->i_inline_version) {
		ci->i_inline_version = extra_info->inline_version;
		if (ci->i_inline_version != CEPH_INLINE_NONE &&
		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
			fill_inline = true;
	}

	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
		if (newcaps & ~issued)
		if (newcaps & ~extra_info->issued)
			wake = true;
		kick_flushing_inode_caps(mdsc, session, inode);
		up_read(&mdsc->snap_rwsem);
		kick_flushing_inode_caps(session->s_mdsc, session, inode);
		up_read(&session->s_mdsc->snap_rwsem);
	} else {
		spin_unlock(&ci->i_ceph_lock);
	}

	if (fill_inline)
		ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
		ceph_fill_inline_data(inode, NULL, extra_info->inline_data,
				      extra_info->inline_len);

	if (queue_trunc)
		ceph_queue_vmtruncate(inode);
@@ -3720,31 +3743,25 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		      struct ceph_msg *msg)
{
	struct ceph_mds_client *mdsc = session->s_mdsc;
	struct super_block *sb = mdsc->fsc->sb;
	struct inode *inode;
	struct ceph_inode_info *ci;
	struct ceph_cap *cap;
	struct ceph_mds_caps *h;
	struct ceph_mds_cap_peer *peer = NULL;
	struct ceph_snap_realm *realm = NULL;
	struct ceph_string *pool_ns = NULL;
	int mds = session->s_mds;
	int op, issued;
	int op;
	int msg_version = le16_to_cpu(msg->hdr.version);
	u32 seq, mseq;
	struct ceph_vino vino;
	u64 tid;
	u64 inline_version = 0;
	void *inline_data = NULL;
	u32  inline_len = 0;
	void *snaptrace;
	size_t snaptrace_len;
	void *p, *end;
	struct cap_extra_info extra_info = {};

	dout("handle_caps from mds%d\n", mds);
	dout("handle_caps from mds%d\n", session->s_mds);

	/* decode */
	end = msg->front.iov_base + msg->front.iov_len;
	tid = le64_to_cpu(msg->hdr.tid);
	if (msg->front.iov_len < sizeof(*h))
		goto bad;
	h = msg->front.iov_base;
@@ -3758,7 +3775,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	snaptrace_len = le32_to_cpu(h->snap_trace_len);
	p = snaptrace + snaptrace_len;

	if (le16_to_cpu(msg->hdr.version) >= 2) {
	if (msg_version >= 2) {
		u32 flock_len;
		ceph_decode_32_safe(&p, end, flock_len, bad);
		if (p + flock_len > end)
@@ -3766,7 +3783,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		p += flock_len;
	}

	if (le16_to_cpu(msg->hdr.version) >= 3) {
	if (msg_version >= 3) {
		if (op == CEPH_CAP_OP_IMPORT) {
			if (p + sizeof(*peer) > end)
				goto bad;
@@ -3778,16 +3795,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		}
	}

	if (le16_to_cpu(msg->hdr.version) >= 4) {
		ceph_decode_64_safe(&p, end, inline_version, bad);
		ceph_decode_32_safe(&p, end, inline_len, bad);
		if (p + inline_len > end)
	if (msg_version >= 4) {
		ceph_decode_64_safe(&p, end, extra_info.inline_version, bad);
		ceph_decode_32_safe(&p, end, extra_info.inline_len, bad);
		if (p + extra_info.inline_len > end)
			goto bad;
		inline_data = p;
		p += inline_len;
		extra_info.inline_data = p;
		p += extra_info.inline_len;
	}

	if (le16_to_cpu(msg->hdr.version) >= 5) {
	if (msg_version >= 5) {
		struct ceph_osd_client	*osdc = &mdsc->fsc->client->osdc;
		u32			epoch_barrier;

@@ -3795,7 +3812,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
	}

	if (le16_to_cpu(msg->hdr.version) >= 8) {
	if (msg_version >= 8) {
		u64 flush_tid;
		u32 caller_uid, caller_gid;
		u32 pool_ns_len;
@@ -3809,13 +3826,33 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		ceph_decode_32_safe(&p, end, pool_ns_len, bad);
		if (pool_ns_len > 0) {
			ceph_decode_need(&p, end, pool_ns_len, bad);
			pool_ns = ceph_find_or_create_string(p, pool_ns_len);
			extra_info.pool_ns =
				ceph_find_or_create_string(p, pool_ns_len);
			p += pool_ns_len;
		}
	}

	if (msg_version >= 11) {
		struct ceph_timespec *btime;
		u64 change_attr;
		u32 flags;

		/* version >= 9 */
		if (p + sizeof(*btime) > end)
			goto bad;
		btime = p;
		p += sizeof(*btime);
		ceph_decode_64_safe(&p, end, change_attr, bad);
		/* version >= 10 */
		ceph_decode_32_safe(&p, end, flags, bad);
		/* version >= 11 */
		extra_info.dirstat_valid = true;
		ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
		ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
	}

	/* lookup ino */
	inode = ceph_find_inode(sb, vino);
	inode = ceph_find_inode(mdsc->fsc->sb, vino);
	ci = ceph_inode(inode);
	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
	     vino.snap, inode);
@@ -3848,7 +3885,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	/* these will work even if we don't have a cap yet */
	switch (op) {
	case CEPH_CAP_OP_FLUSHSNAP_ACK:
		handle_cap_flushsnap_ack(inode, tid, h, session);
		handle_cap_flushsnap_ack(inode, le64_to_cpu(msg->hdr.tid),
					 h, session);
		goto done;

	case CEPH_CAP_OP_EXPORT:
@@ -3867,10 +3905,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
			down_read(&mdsc->snap_rwsem);
		}
		handle_cap_import(mdsc, inode, h, peer, session,
				  &cap, &issued);
		handle_cap_grant(mdsc, inode, h, &pool_ns,
				 inline_version, inline_data, inline_len,
				 msg->middle, session, cap, issued);
				  &cap, &extra_info.issued);
		handle_cap_grant(inode, session, cap,
				 h, msg->middle, &extra_info);
		if (realm)
			ceph_put_snap_realm(mdsc, realm);
		goto done_unlocked;
@@ -3878,10 +3915,11 @@ void ceph_handle_caps(struct ceph_mds_session *session,

	/* the rest require a cap */
	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ceph_inode(inode), mds);
	cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
	if (!cap) {
		dout(" no cap on %p ino %llx.%llx from mds%d\n",
		     inode, ceph_ino(inode), ceph_snap(inode), mds);
		     inode, ceph_ino(inode), ceph_snap(inode),
		     session->s_mds);
		spin_unlock(&ci->i_ceph_lock);
		goto flush_cap_releases;
	}
@@ -3890,15 +3928,15 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	switch (op) {
	case CEPH_CAP_OP_REVOKE:
	case CEPH_CAP_OP_GRANT:
		__ceph_caps_issued(ci, &issued);
		issued |= __ceph_caps_dirty(ci);
		handle_cap_grant(mdsc, inode, h, &pool_ns,
				 inline_version, inline_data, inline_len,
				 msg->middle, session, cap, issued);
		__ceph_caps_issued(ci, &extra_info.issued);
		extra_info.issued |= __ceph_caps_dirty(ci);
		handle_cap_grant(inode, session, cap,
				 h, msg->middle, &extra_info);
		goto done_unlocked;

	case CEPH_CAP_OP_FLUSH_ACK:
		handle_cap_flush_ack(inode, tid, h, session, cap);
		handle_cap_flush_ack(inode, le64_to_cpu(msg->hdr.tid),
				     h, session, cap);
		break;

	case CEPH_CAP_OP_TRUNC:
@@ -3925,7 +3963,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	mutex_unlock(&session->s_mutex);
done_unlocked:
	iput(inode);
	ceph_put_string(pool_ns);
	ceph_put_string(extra_info.pool_ns);
	return;

bad:
+2 −0
Original line number Diff line number Diff line
@@ -1486,6 +1486,8 @@ const struct file_operations ceph_dir_fops = {
	.release = ceph_release,
	.unlocked_ioctl = ceph_ioctl,
	.fsync = ceph_fsync,
	.lock = ceph_lock,
	.flock = ceph_flock,
};

const struct file_operations ceph_snapdir_fops = {
Loading