Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 155cc6b7 authored by David Chinner's avatar David Chinner Committed by Lachlan McIlroy
Browse files

[XFS] Use atomics for iclog reference counting



Now that we update the log tail LSN less frequently on transaction
completion, we pass the contention straight to the global log state lock
(l_iclog_lock) during transaction completion.

We currently have to take this lock to decrement the iclog reference
count. there is a reference count on each iclog, so we need to take þhe
global lock for all refcount changes.

When large numbers of processes are all doing small trnasctions, the iclog
reference counts will be quite high, and the state change that absolutely
requires the l_iclog_lock is the except rather than the norm.

Change the reference counting on the iclogs to use atomic_inc/dec so that
we can use atomic_dec_and_lock during transaction completion and avoid the
need for grabbing the l_iclog_lock for every reference count decrement
except the one that matters - the last.

SGI-PV: 975671
SGI-Modid: xfs-linux-melb:xfs-kern:30505a

Signed-off-by: default avatarDavid Chinner <dgc@sgi.com>
Signed-off-by: default avatarTim Shimmin <tes@sgi.com>
Signed-off-by: default avatarLachlan McIlroy <lachlan@sgi.com>
parent b589334c
Loading
Loading
Loading
Loading
+20 −16
Original line number Original line Diff line number Diff line
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)


		spin_lock(&log->l_icloglock);
		spin_lock(&log->l_icloglock);
		iclog = log->l_iclog;
		iclog = log->l_iclog;
		iclog->ic_refcnt++;
		atomic_inc(&iclog->ic_refcnt);
		spin_unlock(&log->l_icloglock);
		spin_unlock(&log->l_icloglock);
		xlog_state_want_sync(log, iclog);
		xlog_state_want_sync(log, iclog);
		(void) xlog_state_release_iclog(log, iclog);
		(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
		 */
		 */
		spin_lock(&log->l_icloglock);
		spin_lock(&log->l_icloglock);
		iclog = log->l_iclog;
		iclog = log->l_iclog;
		iclog->ic_refcnt++;
		atomic_inc(&iclog->ic_refcnt);
		spin_unlock(&log->l_icloglock);
		spin_unlock(&log->l_icloglock);


		xlog_state_want_sync(log, iclog);
		xlog_state_want_sync(log, iclog);
@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t *log,
	int		v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
	int		v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);


	XFS_STATS_INC(xs_log_writes);
	XFS_STATS_INC(xs_log_writes);
	ASSERT(iclog->ic_refcnt == 0);
	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);


	/* Add for LR header */
	/* Add for LR header */
	count_init = log->l_iclog_hsize + iclog->ic_offset;
	count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2309,7 +2309,7 @@ xlog_state_done_syncing(


	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
	       iclog->ic_state == XLOG_STATE_IOERROR);
	       iclog->ic_state == XLOG_STATE_IOERROR);
	ASSERT(iclog->ic_refcnt == 0);
	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);




@@ -2391,7 +2391,7 @@ xlog_state_get_iclog_space(xlog_t *log,
	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
	head = &iclog->ic_header;
	head = &iclog->ic_header;


	iclog->ic_refcnt++;			/* prevents sync */
	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
	log_offset = iclog->ic_offset;
	log_offset = iclog->ic_offset;


	/* On the 1st write to an iclog, figure out lsn.  This works
	/* On the 1st write to an iclog, figure out lsn.  This works
@@ -2423,12 +2423,12 @@ xlog_state_get_iclog_space(xlog_t *log,
		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);


		/* If I'm the only one writing to this iclog, sync it to disk */
		/* If I'm the only one writing to this iclog, sync it to disk */
		if (iclog->ic_refcnt == 1) {
		if (atomic_read(&iclog->ic_refcnt) == 1) {
			spin_unlock(&log->l_icloglock);
			spin_unlock(&log->l_icloglock);
			if ((error = xlog_state_release_iclog(log, iclog)))
			if ((error = xlog_state_release_iclog(log, iclog)))
				return error;
				return error;
		} else {
		} else {
			iclog->ic_refcnt--;
			atomic_dec(&iclog->ic_refcnt);
			spin_unlock(&log->l_icloglock);
			spin_unlock(&log->l_icloglock);
		}
		}
		goto restart;
		goto restart;
@@ -2819,18 +2819,21 @@ xlog_state_release_iclog(
{
{
	int		sync = 0;	/* do we sync? */
	int		sync = 0;	/* do we sync? */


	spin_lock(&log->l_icloglock);
	if (iclog->ic_state & XLOG_STATE_IOERROR)
		return XFS_ERROR(EIO);

	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
		return 0;

	if (iclog->ic_state & XLOG_STATE_IOERROR) {
	if (iclog->ic_state & XLOG_STATE_IOERROR) {
		spin_unlock(&log->l_icloglock);
		spin_unlock(&log->l_icloglock);
		return XFS_ERROR(EIO);
		return XFS_ERROR(EIO);
	}
	}

	ASSERT(iclog->ic_refcnt > 0);
	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
	       iclog->ic_state == XLOG_STATE_WANT_SYNC);


	if (--iclog->ic_refcnt == 0 &&
	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
		/* update tail before writing to iclog */
		/* update tail before writing to iclog */
		xlog_assign_tail_lsn(log->l_mp);
		xlog_assign_tail_lsn(log->l_mp);
		sync++;
		sync++;
@@ -2950,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
		 * previous iclog and go to sleep.
		 * previous iclog and go to sleep.
		 */
		 */
		if (iclog->ic_state == XLOG_STATE_DIRTY ||
		if (iclog->ic_state == XLOG_STATE_DIRTY ||
		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
		    (atomic_read(&iclog->ic_refcnt) == 0
		     && iclog->ic_offset == 0)) {
			iclog = iclog->ic_prev;
			iclog = iclog->ic_prev;
			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
			    iclog->ic_state == XLOG_STATE_DIRTY)
			    iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2958,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
			else
			else
				goto maybe_sleep;
				goto maybe_sleep;
		} else {
		} else {
			if (iclog->ic_refcnt == 0) {
			if (atomic_read(&iclog->ic_refcnt) == 0) {
				/* We are the only one with access to this
				/* We are the only one with access to this
				 * iclog.  Flush it out now.  There should
				 * iclog.  Flush it out now.  There should
				 * be a roundoff of zero to show that someone
				 * be a roundoff of zero to show that someone
				 * has already taken care of the roundoff from
				 * has already taken care of the roundoff from
				 * the previous sync.
				 * the previous sync.
				 */
				 */
				iclog->ic_refcnt++;
				atomic_inc(&iclog->ic_refcnt);
				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
				xlog_state_switch_iclogs(log, iclog, 0);
				xlog_state_switch_iclogs(log, iclog, 0);
				spin_unlock(&log->l_icloglock);
				spin_unlock(&log->l_icloglock);
@@ -3097,7 +3101,7 @@ xlog_state_sync(xlog_t *log,
			already_slept = 1;
			already_slept = 1;
			goto try_again;
			goto try_again;
		} else {
		} else {
			iclog->ic_refcnt++;
			atomic_inc(&iclog->ic_refcnt);
			xlog_state_switch_iclogs(log, iclog, 0);
			xlog_state_switch_iclogs(log, iclog, 0);
			spin_unlock(&log->l_icloglock);
			spin_unlock(&log->l_icloglock);
			if (xlog_state_release_iclog(log, iclog))
			if (xlog_state_release_iclog(log, iclog))
+1 −1
Original line number Original line Diff line number Diff line
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
#endif
#endif
	int			ic_size;
	int			ic_size;
	int			ic_offset;
	int			ic_offset;
	int			ic_refcnt;
	atomic_t		ic_refcnt;
	int			ic_bwritecnt;
	int			ic_bwritecnt;
	ushort_t		ic_state;
	ushort_t		ic_state;
	char			*ic_datap;	/* pointer to iclog data */
	char			*ic_datap;	/* pointer to iclog data */