Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 114d23aa authored by David Chinner's avatar David Chinner Committed by Lachlan McIlroy
Browse files

[XFS] Per iclog callback chain lock



Rather than use the icloglock for protecting the iclog completion callback
chain, use a new per-iclog lock so that walking the callback chain doesn't
require holding a global lock.

This reduces contention on the icloglock during transaction commit and log
I/O completion by reducing the number of times we need to hold the global
icloglock during these operations.

SGI-PV: 978729
SGI-Modid: xfs-linux-melb:xfs-kern:30770a

Signed-off-by: default avatarDavid Chinner <dgc@sgi.com>
Signed-off-by: default avatarChristoph Hellwig <hch@infradead.org>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent 2abdb8c8
Loading
Loading
Loading
Loading
+19 −16
Original line number Diff line number Diff line
@@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
	       void		  *iclog_hndl,	/* iclog to hang callback off */
	       xfs_log_callback_t *cb)
{
	xlog_t *log = mp->m_log;
	xlog_in_core_t	  *iclog = (xlog_in_core_t *)iclog_hndl;
	int	abortflg;

	cb->cb_next = NULL;
	spin_lock(&log->l_icloglock);
	spin_lock(&iclog->ic_callback_lock);
	abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
	if (!abortflg) {
		ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
		*(iclog->ic_callback_tail) = cb;
		iclog->ic_callback_tail = &(cb->cb_next);
	}
	spin_unlock(&log->l_icloglock);
	spin_unlock(&iclog->ic_callback_lock);
	return abortflg;
}	/* xfs_log_notify */

@@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t *mp,
		iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
		iclog->ic_state = XLOG_STATE_ACTIVE;
		iclog->ic_log = log;
		atomic_set(&iclog->ic_refcnt, 0);
		spin_lock_init(&iclog->ic_callback_lock);
		iclog->ic_callback_tail = &(iclog->ic_callback);
		iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;

@@ -1987,7 +1987,7 @@ xlog_state_clean_log(xlog_t *log)
		if (iclog->ic_state == XLOG_STATE_DIRTY) {
			iclog->ic_state	= XLOG_STATE_ACTIVE;
			iclog->ic_offset       = 0;
			iclog->ic_callback	= NULL;   /* don't need to free */
			ASSERT(iclog->ic_callback == NULL);
			/*
			 * If the number of ops in this iclog indicate it just
			 * contains the dummy transaction, we can
@@ -2190,37 +2190,40 @@ xlog_state_do_callback(
					be64_to_cpu(iclog->ic_header.h_lsn);
				spin_unlock(&log->l_grant_lock);

				/*
				 * Keep processing entries in the callback list
				 * until we come around and it is empty.  We
				 * need to atomically see that the list is
				 * empty and change the state to DIRTY so that
				 * we don't miss any more callbacks being added.
				 */
				spin_lock(&log->l_icloglock);
			} else {
				spin_unlock(&log->l_icloglock);
				ioerrors++;
			}
			cb = iclog->ic_callback;

			/*
			 * Keep processing entries in the callback list until
			 * we come around and it is empty.  We need to
			 * atomically see that the list is empty and change the
			 * state to DIRTY so that we don't miss any more
			 * callbacks being added.
			 */
			spin_lock(&iclog->ic_callback_lock);
			cb = iclog->ic_callback;
			while (cb) {
				iclog->ic_callback_tail = &(iclog->ic_callback);
				iclog->ic_callback = NULL;
				spin_unlock(&log->l_icloglock);
				spin_unlock(&iclog->ic_callback_lock);

				/* perform callbacks in the order given */
				for (; cb; cb = cb_next) {
					cb_next = cb->cb_next;
					cb->cb_func(cb->cb_arg, aborted);
				}
				spin_lock(&log->l_icloglock);
				spin_lock(&iclog->ic_callback_lock);
				cb = iclog->ic_callback;
			}

			loopdidcallbacks++;
			funcdidcallbacks++;

			spin_lock(&log->l_icloglock);
			ASSERT(iclog->ic_callback == NULL);
			spin_unlock(&iclog->ic_callback_lock);
			if (!(iclog->ic_state & XLOG_STATE_IOERROR))
				iclog->ic_state = XLOG_STATE_DIRTY;

+26 −7
Original line number Diff line number Diff line
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
 * - ic_offset is the current number of bytes written to in this iclog.
 * - ic_refcnt is bumped when someone is writing to the log.
 * - ic_state is the state of the iclog.
 *
 * Because of cacheline contention on large machines, we need to separate
 * various resources onto different cachelines. To start with, make the
 * structure cacheline aligned. The following fields can be contended on
 * by independent processes:
 *
 *	- ic_callback_*
 *	- ic_refcnt
 *	- fields protected by the global l_icloglock
 *
 * so we need to ensure that these fields are located in separate cachelines.
 * We'll put all the read-only and l_icloglock fields in the first cacheline,
 * and move everything else out to subsequent cachelines.
 */
typedef struct xlog_iclog_fields {
	sv_t			ic_forcesema;
@@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
	struct xlog_in_core	*ic_prev;
	struct xfs_buf		*ic_bp;
	struct log		*ic_log;
	xfs_log_callback_t	*ic_callback;
	xfs_log_callback_t	**ic_callback_tail;
#ifdef XFS_LOG_TRACE
	struct ktrace		*ic_trace;
#endif
	int			ic_size;
	int			ic_offset;
	atomic_t		ic_refcnt;
	int			ic_bwritecnt;
	ushort_t		ic_state;
	char			*ic_datap;	/* pointer to iclog data */
} xlog_iclog_fields_t;
#ifdef XFS_LOG_TRACE
	struct ktrace		*ic_trace;
#endif

	/* Callback structures need their own cacheline */
	spinlock_t		ic_callback_lock ____cacheline_aligned_in_smp;
	xfs_log_callback_t	*ic_callback;
	xfs_log_callback_t	**ic_callback_tail;

	/* reference counts need their own cacheline */
	atomic_t		ic_refcnt ____cacheline_aligned_in_smp;
} xlog_iclog_fields_t ____cacheline_aligned_in_smp;

typedef union xlog_in_core2 {
	xlog_rec_header_t	hic_header;
@@ -366,6 +384,7 @@ typedef struct xlog_in_core {
#define	ic_bp		hic_fields.ic_bp
#define	ic_log		hic_fields.ic_log
#define	ic_callback	hic_fields.ic_callback
#define	ic_callback_lock hic_fields.ic_callback_lock
#define	ic_callback_tail hic_fields.ic_callback_tail
#define	ic_trace	hic_fields.ic_trace
#define	ic_size		hic_fields.ic_size