Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0e294387 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov
Browse files

ceph: unify cap flush and snapcap flush



This patch includes following changes
- Assign flush tid to snapcap flush
- Remove session's s_cap_snaps_flushing list. Add inode to session's
  s_cap_flushing list instead. Inode is removed from the list when
  there is no pending snapcap flush or cap flush.
- make __kick_flushing_caps() re-send both snapcap flushes and cap
  flushes.

Signed-off-by: default avatarYan, Zheng <zyan@redhat.com>
parent e4500b5e
Loading
Loading
Loading
Loading
+156 −135
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
 * cluster to release server state.
 */

static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc);

/*
 * Generate readable cap strings for debugging output.
@@ -1217,6 +1218,22 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
	return delayed;
}

static inline int __send_flush_snap(struct inode *inode,
				    struct ceph_mds_session *session,
				    struct ceph_cap_snap *capsnap,
				    u32 mseq, u64 oldest_flush_tid)
{
	return send_cap_msg(session, ceph_vino(inode).ino, 0,
			CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
			capsnap->dirty, 0, capsnap->cap_flush.tid,
			oldest_flush_tid, 0, mseq, capsnap->size, 0,
			&capsnap->mtime, &capsnap->atime,
			&capsnap->ctime, capsnap->time_warp_seq,
			capsnap->uid, capsnap->gid, capsnap->mode,
			capsnap->xattr_version, capsnap->xattr_blob,
			capsnap->follows, capsnap->inline_data);
}

/*
 * When a snapshot is taken, clients accumulate dirty metadata on
 * inodes with capabilities in ceph_cap_snaps to describe the file
@@ -1224,14 +1241,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 * asynchronously back to the MDS once sync writes complete and dirty
 * data is written out.
 *
 * Unless @kick is true, skip cap_snaps that were already sent to
 * the MDS (i.e., during this session).
 *
 * Called under i_ceph_lock.  Takes s_mutex as needed.
 */
void __ceph_flush_snaps(struct ceph_inode_info *ci,
			struct ceph_mds_session **psession,
			int kick)
			struct ceph_mds_session **psession)
		__releases(ci->i_ceph_lock)
		__acquires(ci->i_ceph_lock)
{
@@ -1242,6 +1255,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
	struct ceph_mds_session *session = NULL; /* if session != NULL, we hold
						    session->s_mutex */
	u64 oldest_flush_tid;
	u64 next_follows = 0;  /* keep track of how far we've gotten through the
			     i_cap_snaps list, and skip these entries next time
			     around to avoid an infinite loop */
@@ -1272,7 +1286,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
		}

		/* only flush each capsnap once */
		if (!kick && !list_empty(&capsnap->flushing_item)) {
		if (capsnap->cap_flush.tid > 0) {
			dout("already flushed %p, skipping\n", capsnap);
			continue;
		}
@@ -1282,8 +1296,6 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,

		if (session && session->s_mds != mds) {
			dout("oops, wrong session %p mutex\n", session);
			if (kick)
				goto out;

			mutex_unlock(&session->s_mutex);
			ceph_put_mds_session(session);
@@ -1309,26 +1321,27 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
		}

		spin_lock(&mdsc->cap_dirty_lock);
		capsnap->flush_tid = ++mdsc->last_cap_flush_tid;
		capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid;
		list_add_tail(&capsnap->cap_flush.g_list,
			      &mdsc->cap_flush_list);
		oldest_flush_tid = __get_oldest_flush_tid(mdsc);

		if (list_empty(&ci->i_flushing_item)) {
			list_add_tail(&ci->i_flushing_item,
				      &session->s_cap_flushing);
		}
		spin_unlock(&mdsc->cap_dirty_lock);

		list_add_tail(&capsnap->cap_flush.i_list,
			      &ci->i_cap_flush_list);

		atomic_inc(&capsnap->nref);
		if (list_empty(&capsnap->flushing_item))
			list_add_tail(&capsnap->flushing_item,
				      &session->s_cap_snaps_flushing);
		spin_unlock(&ci->i_ceph_lock);

		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
		send_cap_msg(session, ceph_vino(inode).ino, 0,
			     CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
			     capsnap->dirty, 0, capsnap->flush_tid, 0,
			     0, mseq, capsnap->size, 0,
			     &capsnap->mtime, &capsnap->atime,
			     &capsnap->ctime, capsnap->time_warp_seq,
			     capsnap->uid, capsnap->gid, capsnap->mode,
			     capsnap->xattr_version, capsnap->xattr_blob,
			     capsnap->follows, capsnap->inline_data);
		     inode, capsnap, capsnap->follows, capsnap->cap_flush.tid);
		__send_flush_snap(inode, session, capsnap, mseq,
				  oldest_flush_tid);

		next_follows = capsnap->follows + 1;
		ceph_put_cap_snap(capsnap);
@@ -1354,7 +1367,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
static void ceph_flush_snaps(struct ceph_inode_info *ci)
{
	spin_lock(&ci->i_ceph_lock);
	__ceph_flush_snaps(ci, NULL, 0);
	__ceph_flush_snaps(ci, NULL);
	spin_unlock(&ci->i_ceph_lock);
}

@@ -1476,11 +1489,6 @@ static int __mark_caps_flushing(struct inode *inode,
	if (list_empty(&ci->i_flushing_item)) {
		list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
		mdsc->num_cap_flushing++;
		dout(" inode %p now flushing tid %llu\n", inode, cf->tid);
	} else {
		list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
		dout(" inode %p now flushing (more) tid %llu\n",
		     inode, cf->tid);
	}
	spin_unlock(&mdsc->cap_dirty_lock);

@@ -1556,7 +1564,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,

	/* flush snaps first time around only */
	if (!list_empty(&ci->i_cap_snaps))
		__ceph_flush_snaps(ci, &session, 0);
		__ceph_flush_snaps(ci, &session);
	goto retry_locked;
retry:
	spin_lock(&ci->i_ceph_lock);
@@ -1997,80 +2005,74 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
	return err;
}

/*
 * After a recovering MDS goes active, we need to resend any caps
 * we were flushing.
 *
 * Caller holds session->s_mutex.
 */
static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
				   struct ceph_mds_session *session)
{
	struct ceph_cap_snap *capsnap;

	dout("kick_flushing_capsnaps mds%d\n", session->s_mds);
	list_for_each_entry(capsnap, &session->s_cap_snaps_flushing,
			    flushing_item) {
		struct ceph_inode_info *ci = capsnap->ci;
		struct inode *inode = &ci->vfs_inode;
		struct ceph_cap *cap;

		spin_lock(&ci->i_ceph_lock);
		cap = ci->i_auth_cap;
		if (cap && cap->session == session) {
			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
			     cap, capsnap);
			__ceph_flush_snaps(ci, &session, 1);
		} else {
			pr_err("%p auth cap %p not mds%d ???\n", inode,
			       cap, session->s_mds);
		}
		spin_unlock(&ci->i_ceph_lock);
	}
}

static int __kick_flushing_caps(struct ceph_mds_client *mdsc,
static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
				 struct ceph_mds_session *session,
				struct ceph_inode_info *ci)
				 struct ceph_inode_info *ci,
				 u64 oldest_flush_tid)
	__releases(ci->i_ceph_lock)
	__acquires(ci->i_ceph_lock)
{
	struct inode *inode = &ci->vfs_inode;
	struct ceph_cap *cap;
	struct ceph_cap_flush *cf;
	int delayed = 0;
	int ret;
	u64 first_tid = 0;
	u64 oldest_flush_tid;

	spin_lock(&mdsc->cap_dirty_lock);
	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
	spin_unlock(&mdsc->cap_dirty_lock);

	spin_lock(&ci->i_ceph_lock);
	list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
		if (cf->tid < first_tid)
			continue;

		cap = ci->i_auth_cap;
		if (!(cap && cap->session == session)) {
			pr_err("%p auth cap %p not mds%d ???\n", inode,
					cap, session->s_mds);
			spin_unlock(&ci->i_ceph_lock);
			pr_err("%p auth cap %p not mds%d ???\n",
			       inode, cap, session->s_mds);
			break;
		}

		first_tid = cf->tid + 1;

		dout("kick_flushing_caps %p cap %p tid %llu %s\n", inode,
		     cap, cf->tid, ceph_cap_string(cf->caps));
		delayed |= __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
		if (cf->caps) {
			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
			ci->i_ceph_flags |= CEPH_I_NODELAY;
			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
					  __ceph_caps_used(ci),
					  __ceph_caps_wanted(ci),
					  cap->issued | cap->implemented,
					  cf->caps, cf->tid, oldest_flush_tid);
			if (ret) {
				pr_err("kick_flushing_caps: error sending "
					"cap flush, ino (%llx.%llx) "
					"tid %llu flushing %s\n",
					ceph_vinop(inode), cf->tid,
					ceph_cap_string(cf->caps));
			}
		} else {
			struct ceph_cap_snap *capsnap =
					container_of(cf, struct ceph_cap_snap,
						    cap_flush);
			dout("kick_flushing_caps %p capsnap %p tid %llu %s\n",
			     inode, capsnap, cf->tid,
			     ceph_cap_string(capsnap->dirty));

			atomic_inc(&capsnap->nref);
			spin_unlock(&ci->i_ceph_lock);

			ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
						oldest_flush_tid);
			if (ret < 0) {
				pr_err("kick_flushing_caps: error sending "
					"cap flushsnap, ino (%llx.%llx) "
					"tid %llu follows %llu\n",
					ceph_vinop(inode), cf->tid,
					capsnap->follows);
			}

			ceph_put_cap_snap(capsnap);
		}

		spin_lock(&ci->i_ceph_lock);
	}
	spin_unlock(&ci->i_ceph_lock);
	return delayed;
}

void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
@@ -2078,8 +2080,14 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
{
	struct ceph_inode_info *ci;
	struct ceph_cap *cap;
	u64 oldest_flush_tid;

	dout("early_kick_flushing_caps mds%d\n", session->s_mds);

	spin_lock(&mdsc->cap_dirty_lock);
	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
	spin_unlock(&mdsc->cap_dirty_lock);

	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
		spin_lock(&ci->i_ceph_lock);
		cap = ci->i_auth_cap;
@@ -2099,10 +2107,8 @@ void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
		 */
		if ((cap->issued & ci->i_flushing_caps) !=
		    ci->i_flushing_caps) {
			spin_unlock(&ci->i_ceph_lock);
			if (!__kick_flushing_caps(mdsc, session, ci))
				continue;
			spin_lock(&ci->i_ceph_lock);
			__kick_flushing_caps(mdsc, session, ci,
					     oldest_flush_tid);
		}

		spin_unlock(&ci->i_ceph_lock);
@@ -2113,50 +2119,43 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
			     struct ceph_mds_session *session)
{
	struct ceph_inode_info *ci;

	kick_flushing_capsnaps(mdsc, session);
	u64 oldest_flush_tid;

	dout("kick_flushing_caps mds%d\n", session->s_mds);

	spin_lock(&mdsc->cap_dirty_lock);
	oldest_flush_tid = __get_oldest_flush_tid(mdsc);
	spin_unlock(&mdsc->cap_dirty_lock);

	list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
		int delayed = __kick_flushing_caps(mdsc, session, ci);
		if (delayed) {
		spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci);
		__kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
		spin_unlock(&ci->i_ceph_lock);
	}
}
}

static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
				     struct ceph_mds_session *session,
				     struct inode *inode)
	__releases(ci->i_ceph_lock)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_cap *cap;

	spin_lock(&ci->i_ceph_lock);
	cap = ci->i_auth_cap;
	dout("kick_flushing_inode_caps %p flushing %s\n", inode,
	     ceph_cap_string(ci->i_flushing_caps));

	__ceph_flush_snaps(ci, &session, 1);

	if (ci->i_flushing_caps) {
		int delayed;

	if (!list_empty(&ci->i_cap_flush_list)) {
		u64 oldest_flush_tid;
		spin_lock(&mdsc->cap_dirty_lock);
		list_move_tail(&ci->i_flushing_item,
			       &cap->session->s_cap_flushing);
		oldest_flush_tid = __get_oldest_flush_tid(mdsc);
		spin_unlock(&mdsc->cap_dirty_lock);

		__kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
		spin_unlock(&ci->i_ceph_lock);

		delayed = __kick_flushing_caps(mdsc, session, ci);
		if (delayed) {
			spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci);
			spin_unlock(&ci->i_ceph_lock);
		}
	} else {
		spin_unlock(&ci->i_ceph_lock);
	}
@@ -2487,12 +2486,11 @@ static int ceph_try_drop_cap_snap(struct ceph_cap_snap *capsnap)
{
	if (!capsnap->need_flush &&
	    !capsnap->writing && !capsnap->dirty_pages) {

		dout("dropping cap_snap %p follows %llu\n",
		     capsnap, capsnap->follows);
		BUG_ON(capsnap->cap_flush.tid > 0);
		ceph_put_snap_context(capsnap->context);
		list_del(&capsnap->ci_item);
		list_del(&capsnap->flushing_item);
		ceph_put_cap_snap(capsnap);
		return 1;
	}
@@ -2891,13 +2889,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
			fill_inline = true;
	}

	spin_unlock(&ci->i_ceph_lock);

	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
		kick_flushing_inode_caps(mdsc, session, inode);
		up_read(&mdsc->snap_rwsem);
		if (newcaps & ~issued)
			wake = true;
		kick_flushing_inode_caps(mdsc, session, inode);
		up_read(&mdsc->snap_rwsem);
	} else {
		spin_unlock(&ci->i_ceph_lock);
	}

	if (fill_inline)
@@ -2951,6 +2949,8 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
	list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
		if (cf->tid == flush_tid)
			cleaned = cf->caps;
		if (cf->caps == 0) /* capsnap */
			continue;
		if (cf->tid <= flush_tid) {
			list_del(&cf->i_list);
			list_add_tail(&cf->i_list, &to_remove);
@@ -2985,13 +2985,16 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
	}

	if (ci->i_flushing_caps == 0) {
		if (list_empty(&ci->i_cap_flush_list)) {
			list_del_init(&ci->i_flushing_item);
		if (!list_empty(&session->s_cap_flushing))
			if (!list_empty(&session->s_cap_flushing)) {
				dout(" mds%d still flushing cap on %p\n",
				     session->s_mds,
			     &list_entry(session->s_cap_flushing.next,
				     &list_first_entry(&session->s_cap_flushing,
						struct ceph_inode_info,
						i_flushing_item)->vfs_inode);
			}
		}
		mdsc->num_cap_flushing--;
		dout(" inode %p now !flushing\n", inode);

@@ -3039,7 +3042,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
	u64 follows = le64_to_cpu(m->snap_follows);
	struct ceph_cap_snap *capsnap;
	int drop = 0;
	int flushed = 0;

	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
	     inode, ci, session->s_mds, follows);
@@ -3047,31 +3050,48 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
	spin_lock(&ci->i_ceph_lock);
	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
		if (capsnap->follows == follows) {
			if (capsnap->flush_tid != flush_tid) {
			if (capsnap->cap_flush.tid != flush_tid) {
				dout(" cap_snap %p follows %lld tid %lld !="
				     " %lld\n", capsnap, follows,
				     flush_tid, capsnap->flush_tid);
				     flush_tid, capsnap->cap_flush.tid);
				break;
			}
			WARN_ON(capsnap->dirty_pages || capsnap->writing);
			dout(" removing %p cap_snap %p follows %lld\n",
			     inode, capsnap, follows);
			ceph_put_snap_context(capsnap->context);
			list_del(&capsnap->ci_item);
			list_del(&capsnap->flushing_item);
			ceph_put_cap_snap(capsnap);
			wake_up_all(&mdsc->cap_flushing_wq);
			drop = 1;
			flushed = 1;
			break;
		} else {
			dout(" skipping cap_snap %p follows %lld\n",
			     capsnap, capsnap->follows);
		}
	}
	if (flushed) {
		u64 oldest_flush_tid;
		WARN_ON(capsnap->dirty_pages || capsnap->writing);
		dout(" removing %p cap_snap %p follows %lld\n",
		     inode, capsnap, follows);
		list_del(&capsnap->ci_item);
		list_del(&capsnap->cap_flush.i_list);

		spin_lock(&mdsc->cap_dirty_lock);

		if (list_empty(&ci->i_cap_flush_list))
			list_del_init(&ci->i_flushing_item);

		list_del(&capsnap->cap_flush.g_list);

		oldest_flush_tid = __get_oldest_flush_tid(mdsc);
		if (oldest_flush_tid == 0 || oldest_flush_tid > flush_tid)
			wake_up_all(&mdsc->cap_flushing_wq);

		spin_unlock(&mdsc->cap_dirty_lock);
		wake_up_all(&ci->i_cap_wq);
	}
	spin_unlock(&ci->i_ceph_lock);
	if (drop)
	if (flushed) {
		ceph_put_snap_context(capsnap->context);
		ceph_put_cap_snap(capsnap);
		iput(inode);
	}
}

/*
 * Handle TRUNC from MDS, indicating file truncation.
@@ -3175,7 +3195,8 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
			tcap->implemented |= issued;
			if (cap == ci->i_auth_cap)
				ci->i_auth_cap = tcap;
			if (ci->i_flushing_caps && ci->i_auth_cap == tcap) {
			if (!list_empty(&ci->i_cap_flush_list) &&
			    ci->i_auth_cap == tcap) {
				spin_lock(&mdsc->cap_dirty_lock);
				list_move_tail(&ci->i_flushing_item,
					       &tcap->session->s_cap_flushing);
+6 −71
Original line number Diff line number Diff line
@@ -472,7 +472,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
	s->s_cap_iterator = NULL;
	INIT_LIST_HEAD(&s->s_cap_releases);
	INIT_LIST_HEAD(&s->s_cap_flushing);
	INIT_LIST_HEAD(&s->s_cap_snaps_flushing);

	dout("register_session mds%d\n", mds);
	if (mds >= mdsc->max_sessions) {
@@ -1479,21 +1478,6 @@ static int trim_caps(struct ceph_mds_client *mdsc,
	return 0;
}

static int check_capsnap_flush(struct ceph_inode_info *ci,
			       u64 want_snap_seq)
{
	int ret = 1;
	spin_lock(&ci->i_ceph_lock);
	if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
		struct ceph_cap_snap *capsnap =
			list_first_entry(&ci->i_cap_snaps,
					 struct ceph_cap_snap, ci_item);
		ret = capsnap->follows >= want_snap_seq;
	}
	spin_unlock(&ci->i_ceph_lock);
	return ret;
}

static int check_caps_flush(struct ceph_mds_client *mdsc,
			    u64 want_flush_tid)
{
@@ -1520,54 +1504,9 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
 * returns true if we've flushed through want_flush_tid
 */
static void wait_caps_flush(struct ceph_mds_client *mdsc,
			    u64 want_flush_tid, u64 want_snap_seq)
			    u64 want_flush_tid)
{
	int mds;

	dout("check_caps_flush want %llu snap want %llu\n",
	     want_flush_tid, want_snap_seq);
	mutex_lock(&mdsc->mutex);
	for (mds = 0; mds < mdsc->max_sessions; ) {
		struct ceph_mds_session *session = mdsc->sessions[mds];
		struct inode *inode = NULL;

		if (!session) {
			mds++;
			continue;
		}
		get_session(session);
		mutex_unlock(&mdsc->mutex);

		mutex_lock(&session->s_mutex);
		if (!list_empty(&session->s_cap_snaps_flushing)) {
			struct ceph_cap_snap *capsnap =
				list_first_entry(&session->s_cap_snaps_flushing,
						 struct ceph_cap_snap,
						 flushing_item);
			struct ceph_inode_info *ci = capsnap->ci;
			if (!check_capsnap_flush(ci, want_snap_seq)) {
				dout("check_cap_flush still flushing snap %p "
				     "follows %lld <= %lld to mds%d\n",
				     &ci->vfs_inode, capsnap->follows,
				     want_snap_seq, mds);
				inode = igrab(&ci->vfs_inode);
			}
		}
		mutex_unlock(&session->s_mutex);
		ceph_put_mds_session(session);

		if (inode) {
			wait_event(mdsc->cap_flushing_wq,
				   check_capsnap_flush(ceph_inode(inode),
						       want_snap_seq));
			iput(inode);
		} else {
			mds++;
		}

		mutex_lock(&mdsc->mutex);
	}
	mutex_unlock(&mdsc->mutex);
	dout("check_caps_flush want %llu\n", want_flush_tid);

	wait_event(mdsc->cap_flushing_wq,
		   check_caps_flush(mdsc, want_flush_tid));
@@ -3584,7 +3523,7 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)

void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
	u64 want_tid, want_flush, want_snap;
	u64 want_tid, want_flush;

	if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
		return;
@@ -3599,15 +3538,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
	want_flush = mdsc->last_cap_flush_tid;
	spin_unlock(&mdsc->cap_dirty_lock);

	down_read(&mdsc->snap_rwsem);
	want_snap = mdsc->last_snap_seq;
	up_read(&mdsc->snap_rwsem);

	dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
	     want_tid, want_flush, want_snap);
	dout("sync want tid %lld flush_seq %lld\n",
	     want_tid, want_flush);

	wait_unsafe_requests(mdsc, want_tid);
	wait_caps_flush(mdsc, want_flush, want_snap);
	wait_caps_flush(mdsc, want_flush);
}

/*
+0 −1
Original line number Diff line number Diff line
@@ -152,7 +152,6 @@ struct ceph_mds_session {

	/* protected by mutex */
	struct list_head  s_cap_flushing;     /* inodes w/ flushing caps */
	struct list_head  s_cap_snaps_flushing;
	unsigned long     s_renew_requested; /* last time we sent a renew req */
	u64               s_renew_seq;

+1 −3
Original line number Diff line number Diff line
@@ -520,9 +520,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
	ihold(inode);

	atomic_set(&capsnap->nref, 1);
	capsnap->ci = ci;
	INIT_LIST_HEAD(&capsnap->ci_item);
	INIT_LIST_HEAD(&capsnap->flushing_item);

	capsnap->follows = old_snapc->seq;
	capsnap->issued = __ceph_caps_issued(ci, NULL);
@@ -800,7 +798,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
		ihold(inode);
		spin_unlock(&mdsc->snap_flush_lock);
		spin_lock(&ci->i_ceph_lock);
		__ceph_flush_snaps(ci, &session, 0);
		__ceph_flush_snaps(ci, &session);
		spin_unlock(&ci->i_ceph_lock);
		iput(inode);
		spin_lock(&mdsc->snap_flush_lock);
+12 −12
Original line number Diff line number Diff line
@@ -147,6 +147,13 @@ struct ceph_cap {
#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */

struct ceph_cap_flush {
	u64 tid;
	int caps; /* 0 means capsnap */
	struct list_head g_list; // global
	struct list_head i_list; // per inode
};

/*
 * Snapped cap state that is pending flush to mds.  When a snapshot occurs,
 * we first complete any in-process sync writes and writeback any dirty
@@ -154,10 +161,11 @@ struct ceph_cap {
 */
struct ceph_cap_snap {
	atomic_t nref;
	struct ceph_inode_info *ci;
	struct list_head ci_item, flushing_item;
	struct list_head ci_item;

	u64 follows, flush_tid;
	struct ceph_cap_flush cap_flush;

	u64 follows;
	int issued, dirty;
	struct ceph_snap_context *context;

@@ -186,13 +194,6 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
	}
}

struct ceph_cap_flush {
	u64 tid;
	int caps;
	struct list_head g_list; // global
	struct list_head i_list; // per inode
};

/*
 * The frag tree describes how a directory is fragmented, potentially across
 * multiple metadata servers.  It is also used to indicate points where
@@ -888,8 +889,7 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
				       struct ceph_snap_context *snapc);
extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
			       struct ceph_mds_session **psession,
			       int again);
			       struct ceph_mds_session **psession);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			    struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);