Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 474fce06 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers
Browse files

xfs: replace i_flock with a sleeping bitlock



We almost never block on i_flock, the exception is synchronous inode
flushing.  Instead of bloating the inode with a 16/24-byte completion
that we abuse as a semaphore just implement it as a bitlock that uses
a bit waitqueue for the rare sleeping path.  This primarily is a
tradeoff between a much smaller inode and a faster non-blocking
path vs faster wakeups, and we are much better off with the former.

A small downside is that we will lose lockdep checking for i_flock, but
given that it's always taken inside the ilock that should be acceptable.

Note that for example the inode writeback locking is implemented in a
very similar way.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarAlex Elder <aelder@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 49e4c70e
Loading
Loading
Loading
Loading
+18 −2
Original line number Original line Diff line number Diff line
@@ -77,7 +77,7 @@ xfs_inode_alloc(


	ASSERT(atomic_read(&ip->i_pincount) == 0);
	ASSERT(atomic_read(&ip->i_pincount) == 0);
	ASSERT(!spin_is_locked(&ip->i_flags_lock));
	ASSERT(!spin_is_locked(&ip->i_flags_lock));
	ASSERT(completion_done(&ip->i_flush));
	ASSERT(!xfs_isiflocked(ip));
	ASSERT(ip->i_ino == 0);
	ASSERT(ip->i_ino == 0);


	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
	mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -150,7 +150,7 @@ xfs_inode_free(
	/* asserts to verify all state is correct here */
	/* asserts to verify all state is correct here */
	ASSERT(atomic_read(&ip->i_pincount) == 0);
	ASSERT(atomic_read(&ip->i_pincount) == 0);
	ASSERT(!spin_is_locked(&ip->i_flags_lock));
	ASSERT(!spin_is_locked(&ip->i_flags_lock));
	ASSERT(completion_done(&ip->i_flush));
	ASSERT(!xfs_isiflocked(ip));


	/*
	/*
	 * Because we use RCU freeing we need to ensure the inode always
	 * Because we use RCU freeing we need to ensure the inode always
@@ -713,3 +713,19 @@ xfs_isilocked(
	return 0;
	return 0;
}
}
#endif
#endif

void
__xfs_iflock(
	struct xfs_inode	*ip)
{
	wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
	DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);

	do {
		prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
		if (xfs_isiflocked(ip))
			io_schedule();
	} while (!xfs_iflock_nowait(ip));

	finish_wait(wq, &wait.wait);
}
+2 −2
Original line number Original line Diff line number Diff line
@@ -2396,7 +2396,7 @@ xfs_iflush(
	XFS_STATS_INC(xs_iflush_count);
	XFS_STATS_INC(xs_iflush_count);


	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
	ASSERT(!completion_done(&ip->i_flush));
	ASSERT(xfs_isiflocked(ip));
	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));


@@ -2512,7 +2512,7 @@ xfs_iflush_int(
#endif
#endif


	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
	ASSERT(!completion_done(&ip->i_flush));
	ASSERT(xfs_isiflocked(ip));
	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));


+50 −28
Original line number Original line Diff line number Diff line
@@ -237,7 +237,6 @@ typedef struct xfs_inode {
	struct xfs_inode_log_item *i_itemp;	/* logging information */
	struct xfs_inode_log_item *i_itemp;	/* logging information */
	mrlock_t		i_lock;		/* inode lock */
	mrlock_t		i_lock;		/* inode lock */
	mrlock_t		i_iolock;	/* inode IO lock */
	mrlock_t		i_iolock;	/* inode IO lock */
	struct completion	i_flush;	/* inode flush completion q */
	atomic_t		i_pincount;	/* inode pin count */
	atomic_t		i_pincount;	/* inode pin count */
	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
	spinlock_t		i_flags_lock;	/* inode i_flags lock */
	spinlock_t		i_flags_lock;	/* inode i_flags lock */
@@ -324,6 +323,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
	return ret;
	return ret;
}
}


static inline int
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
{
	int ret;

	spin_lock(&ip->i_flags_lock);
	ret = ip->i_flags & flags;
	if (!ret)
		ip->i_flags |= flags;
	spin_unlock(&ip->i_flags_lock);
	return ret;
}

/*
/*
 * Project quota id helpers (previously projid was 16bit only
 * Project quota id helpers (previously projid was 16bit only
 * and using two 16bit values to hold new 32bit projid was chosen
 * and using two 16bit values to hold new 32bit projid was chosen
@@ -343,36 +355,18 @@ xfs_set_projid(struct xfs_inode *ip,
	ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
	ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
}
}


/*
 * Manage the i_flush queue embedded in the inode.  This completion
 * queue synchronizes processes attempting to flush the in-core
 * inode back to disk.
 */
static inline void xfs_iflock(xfs_inode_t *ip)
{
	wait_for_completion(&ip->i_flush);
}

static inline int xfs_iflock_nowait(xfs_inode_t *ip)
{
	return try_wait_for_completion(&ip->i_flush);
}

static inline void xfs_ifunlock(xfs_inode_t *ip)
{
	complete(&ip->i_flush);
}

/*
/*
 * In-core inode flags.
 * In-core inode flags.
 */
 */
#define XFS_IRECLAIM		0x0001  /* started reclaiming this inode */
#define XFS_IRECLAIM		(1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE		0x0002	/* inode has been staled */
#define XFS_ISTALE		(1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE	0x0004	/* inode can be reclaimed */
#define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */
#define XFS_INEW		0x0008	/* inode has just been allocated */
#define XFS_INEW		(1 << 3) /* inode has just been allocated */
#define XFS_IFILESTREAM		0x0010	/* inode is in a filestream directory */
#define XFS_IFILESTREAM		(1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED		0x0020	/* truncated down so flush-on-close */
#define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE	0x0040	/* dirty release already seen */
#define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT	7	 /* inode is being flushed right now */
#define XFS_IFLOCK		(1 << __XFS_IFLOCK_BIT)


/*
/*
 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -384,6 +378,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
	 XFS_IFILESTREAM);
	 XFS_IFILESTREAM);


/*
 * Synchronize processes attempting to flush the in-core inode back to disk.
 */

extern void __xfs_iflock(struct xfs_inode *ip);

static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
	return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}

static inline void xfs_iflock(struct xfs_inode *ip)
{
	if (!xfs_iflock_nowait(ip))
		__xfs_iflock(ip);
}

static inline void xfs_ifunlock(struct xfs_inode *ip)
{
	xfs_iflags_clear(ip, XFS_IFLOCK);
	wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}

static inline int xfs_isiflocked(struct xfs_inode *ip)
{
	return xfs_iflags_test(ip, XFS_IFLOCK);
}

/*
/*
 * Flags for inode locking.
 * Flags for inode locking.
 * Bit ranges:	1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
 * Bit ranges:	1<<1  - 1<<16-1 -- iolock/ilock modes (bitfield)
+2 −2
Original line number Original line Diff line number Diff line
@@ -717,7 +717,7 @@ xfs_inode_item_pushbuf(
	 * If a flush is not in progress anymore, chances are that the
	 * If a flush is not in progress anymore, chances are that the
	 * inode was taken off the AIL. So, just get out.
	 * inode was taken off the AIL. So, just get out.
	 */
	 */
	if (completion_done(&ip->i_flush) ||
	if (!xfs_isiflocked(ip) ||
	    !(lip->li_flags & XFS_LI_IN_AIL)) {
	    !(lip->li_flags & XFS_LI_IN_AIL)) {
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		return true;
		return true;
@@ -750,7 +750,7 @@ xfs_inode_item_push(
	struct xfs_inode	*ip = iip->ili_inode;
	struct xfs_inode	*ip = iip->ili_inode;


	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
	ASSERT(!completion_done(&ip->i_flush));
	ASSERT(xfs_isiflocked(ip));


	/*
	/*
	 * Since we were able to lock the inode's flush lock and
	 * Since we were able to lock the inode's flush lock and
+0 −7
Original line number Original line Diff line number Diff line
@@ -829,13 +829,6 @@ xfs_fs_inode_init_once(
	atomic_set(&ip->i_pincount, 0);
	atomic_set(&ip->i_pincount, 0);
	spin_lock_init(&ip->i_flags_lock);
	spin_lock_init(&ip->i_flags_lock);
	init_waitqueue_head(&ip->i_ipin_wait);
	init_waitqueue_head(&ip->i_ipin_wait);
	/*
	 * Because we want to use a counting completion, complete
	 * the flush completion once to allow a single access to
	 * the flush completion without blocking.
	 */
	init_completion(&ip->i_flush);
	complete(&ip->i_flush);


	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
		     "xfsino", ip->i_ino);
		     "xfsino", ip->i_ino);
Loading