Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit be655596 authored by Sage Weil's avatar Sage Weil
Browse files

ceph: use i_ceph_lock instead of i_lock



We have been using i_lock to protect all kinds of data structures in the
ceph_inode_info struct, including lists of inodes that we need to iterate
over while avoiding races with inode destruction.  That requires grabbing
a reference to the inode with the list lock protected, but igrab() now
takes i_lock to check the inode flags.

Changing the list lock ordering would be a painful process.

However, using a ceph-specific i_ceph_lock in the ceph inode instead of
i_lock is a simple mechanical change and avoids the ordering constraints
imposed by igrab().

Reported-by: default avatarAmon Ott <a.ott@m-privacy.de>
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 51703306
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
	snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);

	/* dirty the head */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (ci->i_head_snapc == NULL)
		ci->i_head_snapc = ceph_get_snap_context(snapc);
	++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
	     ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
	     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
	     snapc, snapc->seq, snapc->num_snaps);
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	/* now adjust page */
	spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
	struct ceph_snap_context *snapc = NULL;
	struct ceph_cap_snap *capsnap = NULL;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
		     capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
		dout(" head snapc %p has %d dirty pages\n",
		     snapc, ci->i_wrbuffer_ref_head);
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return snapc;
}

+94 −93
Original line number Diff line number Diff line
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
/*
 * Find ceph_cap for given mds, if any.
 *
 * Called with i_lock held.
 * Called with i_ceph_lock held.
 */
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
	struct ceph_cap *cap;

	spin_lock(&ci->vfs_inode.i_lock);
	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	spin_unlock(&ci->vfs_inode.i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return cap;
}

@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)

int ceph_get_cap_mds(struct inode *inode)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int mds;
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	mds = __ceph_get_cap_mds(ceph_inode(inode));
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return mds;
}

/*
 * Called under i_lock.
 * Called under i_ceph_lock.
 */
static void __insert_cap_node(struct ceph_inode_info *ci,
			      struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 *
 * If I_FLUSH is set, leave the inode at the front of the list.
 *
 * Caller holds i_lock
 * Caller holds i_ceph_lock
 *    -> we take mdsc->cap_delay_lock
 */
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
/*
 * Cancel delayed work on cap.
 *
 * Caller must hold i_lock.
 * Caller must hold i_ceph_lock.
 */
static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
			       struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
		wanted |= ceph_caps_for_mode(fmode);

retry:
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ci, mds);
	if (!cap) {
		if (new_cap) {
			cap = new_cap;
			new_cap = NULL;
		} else {
			spin_unlock(&inode->i_lock);
			spin_unlock(&ci->i_ceph_lock);
			new_cap = get_cap(mdsc, caps_reservation);
			if (new_cap == NULL)
				return -ENOMEM;
@@ -625,7 +626,7 @@ int ceph_add_cap(struct inode *inode,

	if (fmode >= 0)
		__ceph_get_fmode(ci, fmode);
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	wake_up_all(&ci->i_cap_wq);
	return 0;
}
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
	struct rb_node *p;
	int ret = 0;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
		cap = rb_entry(p, struct ceph_cap, ci_node);
		if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
			break;
		}
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	dout("ceph_caps_revoking %p %s = %d\n", inode,
	     ceph_cap_string(mask), ret);
	return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
}

/*
 * called under i_lock
 * called under i_ceph_lock
 */
static int __ceph_is_any_caps(struct ceph_inode_info *ci)
{
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
/*
 * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
 *
 * caller should hold i_lock.
 * caller should hold i_ceph_lock.
 * caller will not hold session s_mutex if called from destroy_inode.
 */
void __ceph_remove_cap(struct ceph_cap *cap)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,

/*
 * Queue cap releases when an inode is dropped from our cache.  Since
 * inode is about to be destroyed, there is no need for i_lock.
 * inode is about to be destroyed, there is no need for i_ceph_lock.
 */
void ceph_queue_caps_release(struct inode *inode)
{
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)

/*
 * Send a cap msg on the given inode.  Update our caps state, then
 * drop i_lock and send the message.
 * drop i_ceph_lock and send the message.
 *
 * Make note of max_size reported/requested from mds, revoked caps
 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
 * Return non-zero if delayed release, or we experienced an error
 * such that the caller should requeue + retry later.
 *
 * called with i_lock, then drops it.
 * called with i_ceph_lock, then drops it.
 * caller should hold snap_rwsem (read), s_mutex.
 */
static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
		      int op, int used, int want, int retain, int flushing,
		      unsigned *pflush_tid)
	__releases(cap->ci->vfs_inode->i_lock)
	__releases(cap->ci->i_ceph_lock)
{
	struct ceph_inode_info *ci = cap->ci;
	struct inode *inode = &ci->vfs_inode;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
		xattr_version = ci->i_xattrs.version;
	}

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
		op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 * Unless @again is true, skip cap_snaps that were already sent to
 * the MDS (i.e., during this session).
 *
 * Called under i_lock.  Takes s_mutex as needed.
 * Called under i_ceph_lock.  Takes s_mutex as needed.
 */
void __ceph_flush_snaps(struct ceph_inode_info *ci,
			struct ceph_mds_session **psession,
			int again)
		__releases(ci->vfs_inode->i_lock)
		__acquires(ci->vfs_inode->i_lock)
		__releases(ci->i_ceph_lock)
		__acquires(ci->i_ceph_lock)
{
	struct inode *inode = &ci->vfs_inode;
	int mds;
@@ -1261,7 +1262,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
			session = NULL;
		}
		if (!session) {
			spin_unlock(&inode->i_lock);
			spin_unlock(&ci->i_ceph_lock);
			mutex_lock(&mdsc->mutex);
			session = __ceph_lookup_mds_session(mdsc, mds);
			mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
			 * deletion or migration.  retry, and we'll
			 * get a better @mds value next time.
			 */
			spin_lock(&inode->i_lock);
			spin_lock(&ci->i_ceph_lock);
			goto retry;
		}

@@ -1285,7 +1286,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
			list_del_init(&capsnap->flushing_item);
		list_add_tail(&capsnap->flushing_item,
			      &session->s_cap_snaps_flushing);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);

		dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
		     inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,
		next_follows = capsnap->follows + 1;
		ceph_put_cap_snap(capsnap);

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		goto retry;
	}

@@ -1322,11 +1323,9 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci,

static void ceph_flush_snaps(struct ceph_inode_info *ci)
{
	struct inode *inode = &ci->vfs_inode;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	__ceph_flush_snaps(ci, NULL, 0);
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
}

/*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
 * Add dirty inode to the flushing list.  Assigned a seq number so we
 * can wait for caps to flush without starving.
 *
 * Called under i_lock.
 * Called under i_ceph_lock.
 */
static int __mark_caps_flushing(struct inode *inode,
				 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
	struct ceph_inode_info *ci = ceph_inode(inode);
	u32 invalidating_gen = ci->i_rdcache_gen;

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	invalidate_mapping_pages(&inode->i_data, 0, -1);
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);

	if (inode->i_data.nrpages == 0 &&
	    invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
	if (mdsc->stopping)
		is_delayed = 1;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);

	if (ci->i_ceph_flags & CEPH_I_FLUSH)
		flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
		__ceph_flush_snaps(ci, &session, 0);
	goto retry_locked;
retry:
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
retry_locked:
	file_wanted = __ceph_caps_file_wanted(ci);
	used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			if (mutex_trylock(&session->s_mutex) == 0) {
				dout("inverting session/ino locks on %p\n",
				     session);
				spin_unlock(&inode->i_lock);
				spin_unlock(&ci->i_ceph_lock);
				if (took_snap_rwsem) {
					up_read(&mdsc->snap_rwsem);
					took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
			if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
				dout("inverting snap/in locks on %p\n",
				     inode);
				spin_unlock(&inode->i_lock);
				spin_unlock(&ci->i_ceph_lock);
				down_read(&mdsc->snap_rwsem);
				took_snap_rwsem = 1;
				goto retry;
@@ -1664,10 +1663,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
		mds = cap->mds;  /* remember mds, so we don't repeat */
		sent++;

		/* __send_cap drops i_lock */
		/* __send_cap drops i_ceph_lock */
		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
				      retain, flushing, NULL);
		goto retry; /* retake i_lock and restart our cap scan. */
		goto retry; /* retake i_ceph_lock and restart our cap scan. */
	}

	/*
@@ -1681,7 +1680,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
	else if (!is_delayed || force_requeue)
		__cap_delay_requeue(mdsc, ci);

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	if (queue_invalidate)
		ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
	int flushing = 0;

retry:
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
		dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
		goto out;
@@ -1716,7 +1715,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
		int delayed;

		if (!session) {
			spin_unlock(&inode->i_lock);
			spin_unlock(&ci->i_ceph_lock);
			session = cap->session;
			mutex_lock(&session->s_mutex);
			goto retry;
@@ -1727,18 +1726,18 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,

		flushing = __mark_caps_flushing(inode, session);

		/* __send_cap drops i_lock */
		/* __send_cap drops i_ceph_lock */
		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
				     cap->issued | cap->implemented, flushing,
				     flush_tid);
		if (!delayed)
			goto out_unlocked;

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		__cap_delay_requeue(mdsc, ci);
	}
out:
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
out_unlocked:
	if (session && unlock_session)
		mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
	struct ceph_inode_info *ci = ceph_inode(inode);
	int i, ret = 1;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	for (i = 0; i < CEPH_CAP_BITS; i++)
		if ((ci->i_flushing_caps & (1 << i)) &&
		    ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
			ret = 0;
			break;
		}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return ret;
}

@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
		struct ceph_mds_client *mdsc =
			ceph_sb_to_client(inode->i_sb)->mdsc;

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		if (__ceph_caps_dirty(ci))
			__cap_delay_requeue_front(mdsc, ci);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
	}
	return err;
}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
		struct inode *inode = &ci->vfs_inode;
		struct ceph_cap *cap;

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		cap = ci->i_auth_cap;
		if (cap && cap->session == session) {
			dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
			pr_err("%p auth cap %p not mds%d ???\n", inode,
			       cap, session->s_mds);
		}
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
	}
}

@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
		struct ceph_cap *cap;
		int delayed = 0;

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		cap = ci->i_auth_cap;
		if (cap && cap->session == session) {
			dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
					     cap->issued | cap->implemented,
					     ci->i_flushing_caps, NULL);
			if (delayed) {
				spin_lock(&inode->i_lock);
				spin_lock(&ci->i_ceph_lock);
				__cap_delay_requeue(mdsc, ci);
				spin_unlock(&inode->i_lock);
				spin_unlock(&ci->i_ceph_lock);
			}
		} else {
			pr_err("%p auth cap %p not mds%d ???\n", inode,
			       cap, session->s_mds);
			spin_unlock(&inode->i_lock);
			spin_unlock(&ci->i_ceph_lock);
		}
	}
}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
	struct ceph_cap *cap;
	int delayed = 0;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	cap = ci->i_auth_cap;
	dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
	     ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
				     cap->issued | cap->implemented,
				     ci->i_flushing_caps, NULL);
		if (delayed) {
			spin_lock(&inode->i_lock);
			spin_lock(&ci->i_ceph_lock);
			__cap_delay_requeue(mdsc, ci);
			spin_unlock(&inode->i_lock);
			spin_unlock(&ci->i_ceph_lock);
		}
	} else {
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
	}
}

@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
 * Take references to capabilities we hold, so that we don't release
 * them to the MDS prematurely.
 *
 * Protected by i_lock.
 * Protected by i_ceph_lock.
 */
static void __take_cap_refs(struct ceph_inode_info *ci, int got)
{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,

	dout("get_cap_refs %p need %s want %s\n", inode,
	     ceph_cap_string(need), ceph_cap_string(want));
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);

	/* make sure file is actually open */
	file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
		     ceph_cap_string(have), ceph_cap_string(need));
	}
out:
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	dout("get_cap_refs %p ret %d got %s\n", inode,
	     ret, ceph_cap_string(*got));
	return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
	int check = 0;

	/* do we need to explicitly request a larger max_size? */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if ((endoff >= ci->i_max_size ||
	     endoff > (inode->i_size << 1)) &&
	    endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
		ci->i_wanted_max_size = endoff;
		check = 1;
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	if (check)
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
}
@@ -2140,9 +2139,9 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
 */
void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
{
	spin_lock(&ci->vfs_inode.i_lock);
	spin_lock(&ci->i_ceph_lock);
	__take_cap_refs(ci, caps);
	spin_unlock(&ci->vfs_inode.i_lock);
	spin_unlock(&ci->i_ceph_lock);
}

/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
	int last = 0, put = 0, flushsnaps = 0, wake = 0;
	struct ceph_cap_snap *capsnap;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (had & CEPH_CAP_PIN)
		--ci->i_pin_ref;
	if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
				}
			}
		}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
	     last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
	int found = 0;
	struct ceph_cap_snap *capsnap = NULL;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	ci->i_wrbuffer_ref -= nr;
	last = !ci->i_wrbuffer_ref;

@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
		}
	}

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	if (last) {
		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
 * actually be a revocation if it specifies a smaller cap set.)
 *
 * caller holds s_mutex and i_lock, we drop both.
 * caller holds s_mutex and i_ceph_lock, we drop both.
 *
 * return value:
 *  0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
			     struct ceph_mds_session *session,
			     struct ceph_cap *cap,
			     struct ceph_buffer *xattr_buf)
		__releases(inode->i_lock)
		__releases(ci->i_ceph_lock)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
	}
	BUG_ON(cap->issued & ~cap->implemented);

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	if (writeback)
		/*
		 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
				 struct ceph_mds_caps *m,
				 struct ceph_mds_session *session,
				 struct ceph_cap *cap)
	__releases(inode->i_lock)
	__releases(ci->i_ceph_lock)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
	wake_up_all(&ci->i_cap_wq);

out:
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	if (drop)
		iput(inode);
}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
	dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
	     inode, ci, session->s_mds, follows);

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
		if (capsnap->follows == follows) {
			if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
			     capsnap, capsnap->follows);
		}
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	if (drop)
		iput(inode);
}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
static void handle_cap_trunc(struct inode *inode,
			     struct ceph_mds_caps *trunc,
			     struct ceph_mds_session *session)
	__releases(inode->i_lock)
	__releases(ci->i_ceph_lock)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
	     inode, mds, seq, truncate_size, truncate_seq);
	queue_trunc = ceph_fill_file_size(inode, issued,
					  truncate_seq, truncate_size, size);
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	if (queue_trunc)
		ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
	dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
	     inode, ci, mds, mseq);

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);

	/* make sure we haven't seen a higher mseq */
	for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
	}
	/* else, we already released it */

	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
}

/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
	up_read(&mdsc->snap_rwsem);

	/* make sure we re-request max_size, if necessary */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	ci->i_requested_max_size = 0;
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
}

/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	struct ceph_mds_client *mdsc = session->s_mdsc;
	struct super_block *sb = mdsc->fsc->sb;
	struct inode *inode;
	struct ceph_inode_info *ci;
	struct ceph_cap *cap;
	struct ceph_mds_caps *h;
	int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,

	/* lookup ino */
	inode = ceph_find_inode(sb, vino);
	ci = ceph_inode(inode);
	dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
	     vino.snap, inode);
	if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
	}

	/* the rest require a cap */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	cap = __get_cap_for_mds(ceph_inode(inode), mds);
	if (!cap) {
		dout(" no cap on %p ino %llx.%llx from mds%d\n",
		     inode, ceph_ino(inode), ceph_snap(inode), mds);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		goto flush_cap_releases;
	}

	/* note that each of these drops i_lock for us */
	/* note that each of these drops i_ceph_lock for us */
	switch (op) {
	case CEPH_CAP_OP_REVOKE:
	case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
		break;

	default:
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
		       ceph_cap_op_name(op));
	}
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
	struct inode *inode = &ci->vfs_inode;
	int last = 0;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
	     ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
	BUG_ON(ci->i_nr_by_mode[fmode] == 0);
	if (--ci->i_nr_by_mode[fmode] == 0)
		last++;
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	if (last && ci->i_vino.snap == CEPH_NOSNAP)
		ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
	int used, dirty;
	int ret = 0;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	used = __ceph_caps_used(ci);
	dirty = __ceph_caps_dirty(ci);

@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
			     inode, cap, ceph_cap_string(cap->issued));
		}
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return ret;
}

@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,

	/*
	 * force an record for the directory caps if we have a dentry lease.
	 * this is racy (can't take i_lock and d_lock together), but it
	 * this is racy (can't take i_ceph_lock and d_lock together), but it
	 * doesn't have to be perfect; the mds will revoke anything we don't
	 * release.
	 */
+12 −12
Original line number Diff line number Diff line
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
	}

	/* can we use the dcache? */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if ((filp->f_pos == 2 || fi->dentry) &&
	    !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
	    ceph_snap(inode) != CEPH_SNAPDIR &&
	    ceph_dir_test_complete(inode) &&
	    __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		err = __dcache_readdir(filp, dirent, filldir);
		if (err != -EAGAIN)
			return err;
	} else {
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
	}
	if (fi->dentry) {
		err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
	 * were released during the whole readdir, and we should have
	 * the complete dir contents in our cache.
	 */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (ci->i_release_count == fi->dir_release_count) {
		ceph_dir_set_complete(inode);
		ci->i_max_offset = filp->f_pos;
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	dout("readdir %p filp %p done.\n", inode, filp);
	return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
		struct ceph_inode_info *ci = ceph_inode(dir);
		struct ceph_dentry_info *di = ceph_dentry(dentry);

		spin_lock(&dir->i_lock);
		spin_lock(&ci->i_ceph_lock);
		dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
		if (strncmp(dentry->d_name.name,
			    fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
		    !is_root_ceph_dentry(dir, dentry) &&
		    ceph_dir_test_complete(dir) &&
		    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
			spin_unlock(&dir->i_lock);
			spin_unlock(&ci->i_ceph_lock);
			dout(" dir %p complete, -ENOENT\n", dir);
			d_add(dentry, NULL);
			di->lease_shared_gen = ci->i_shared_gen;
			return NULL;
		}
		spin_unlock(&dir->i_lock);
		spin_unlock(&ci->i_ceph_lock);
	}

	op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
	struct ceph_inode_info *ci = ceph_inode(inode);
	int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;

	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (inode->i_nlink == 1) {
		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
		ci->i_ceph_flags |= CEPH_I_NODELAY;
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	return drop;
}

@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
	struct ceph_dentry_info *di = ceph_dentry(dentry);
	int valid = 0;

	spin_lock(&dir->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (ci->i_shared_gen == di->lease_shared_gen)
		valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
	spin_unlock(&dir->i_lock);
	spin_unlock(&ci->i_ceph_lock);
	dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
	     dir, (unsigned)ci->i_shared_gen, dentry,
	     (unsigned)di->lease_shared_gen, valid);
+10 −10
Original line number Diff line number Diff line
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)

	/* trivially open snapdir */
	if (ceph_snap(inode) == CEPH_SNAPDIR) {
		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		__ceph_get_fmode(ci, fmode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		return ceph_init_file(inode, file, fmode);
	}

@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
	 * write) or any MDS (for read).  Update wanted set
	 * asynchronously.
	 */
	spin_lock(&inode->i_lock);
	spin_lock(&ci->i_ceph_lock);
	if (__ceph_is_any_real_caps(ci) &&
	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
		int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
		     inode, fmode, ceph_cap_string(wanted),
		     ceph_cap_string(issued));
		__ceph_get_fmode(ci, fmode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);

		/* adjust wanted? */
		if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
		   (ci->i_snap_caps & wanted) == wanted) {
		__ceph_get_fmode(ci, fmode);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		return ceph_init_file(inode, file, fmode);
	}
	spin_unlock(&inode->i_lock);
	spin_unlock(&ci->i_ceph_lock);

	dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
	req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
		 */
		int dirty;

		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		ceph_put_cap_refs(ci, got);

		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,

	if (ret >= 0) {
		int dirty;
		spin_lock(&inode->i_lock);
		spin_lock(&ci->i_ceph_lock);
		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
		spin_unlock(&inode->i_lock);
		spin_unlock(&ci->i_ceph_lock);
		if (dirty)
			__mark_inode_dirty(inode, dirty);
	}
Loading