Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 600aa109 authored by Dan Williams's avatar Dan Williams Committed by Neil Brown
Browse files

md: replace STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} with 'reconstruct_states'



From: Dan Williams <dan.j.williams@intel.com>

Track the state of reconstruct operations (recalculating the parity block
usually due to incoming writes, or as part of array expansion)  Reduces the
scope of the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags to only tracking whether
a reconstruct operation has been requested via the ops_request field of struct
stripe_head_state.

This is the final step in the removal of ops.{pending,ack,complete,count}, i.e.
the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags only request an operation and do
not track the state of the operation.

Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarNeil Brown <neilb@suse.de>
parent 976ea8d4
Loading
Loading
Loading
Loading
+62 −142
Original line number Diff line number Diff line
@@ -122,6 +122,13 @@ static void return_io(struct bio *return_bi)

static void print_raid5_conf (raid5_conf_t *conf);

static int stripe_operations_active(struct stripe_head *sh)
{
	return sh->check_state || sh->reconstruct_state ||
	       test_bit(STRIPE_BIOFILL_RUN, &sh->state) ||
	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
}

static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
{
	if (atomic_dec_and_test(&sh->count)) {
@@ -141,7 +148,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
			}
			md_wakeup_thread(conf->mddev->thread);
		} else {
			BUG_ON(sh->ops.pending);
			BUG_ON(stripe_operations_active(sh));
			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
				atomic_dec(&conf->preread_active_stripes);
				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -243,7 +250,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int

	BUG_ON(atomic_read(&sh->count) != 0);
	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
	BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
	BUG_ON(stripe_operations_active(sh));

	CHECK_DEVLOCK();
	pr_debug("init_stripe called, stripe %llu\n",
@@ -344,47 +351,6 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
	return sh;
}

/* test_and_ack_op() ensures that we only dequeue an operation once */
#define test_and_ack_op(op, pend) \
do {							\
	if (test_bit(op, &sh->ops.pending) &&		\
		!test_bit(op, &sh->ops.complete)) {	\
		if (test_and_set_bit(op, &sh->ops.ack)) \
			clear_bit(op, &pend);		\
		else					\
			ack++;				\
	} else						\
		clear_bit(op, &pend);			\
} while (0)

/* find new work to run, do not resubmit work that is already
 * in flight
 */
static unsigned long get_stripe_work(struct stripe_head *sh)
{
	unsigned long pending;
	int ack = 0;

	pending = sh->ops.pending;

	test_and_ack_op(STRIPE_OP_BIOFILL, pending);
	test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
	test_and_ack_op(STRIPE_OP_PREXOR, pending);
	test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
	test_and_ack_op(STRIPE_OP_POSTXOR, pending);
	test_and_ack_op(STRIPE_OP_CHECK, pending);

	sh->ops.count -= ack;
	if (unlikely(sh->ops.count < 0)) {
		printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx "
			"ops.complete: %#lx\n", pending, sh->ops.pending,
			sh->ops.ack, sh->ops.complete);
		BUG();
	}

	return pending;
}

static void
raid5_end_read_request(struct bio *bi, int error);
static void
@@ -609,7 +575,7 @@ static void ops_complete_compute5(void *stripe_head_ref)
}

static struct dma_async_tx_descriptor *
ops_run_compute5(struct stripe_head *sh, unsigned long pending)
ops_run_compute5(struct stripe_head *sh, unsigned long ops_request)
{
	/* kernel stack size limits the total number of disks */
	int disks = sh->disks;
@@ -640,7 +606,7 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending)
			ops_complete_compute5, sh);

	/* ack now if postxor is not set to be run */
	if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))
	if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
		async_tx_ack(tx);

	return tx;
@@ -652,8 +618,6 @@ static void ops_complete_prexor(void *stripe_head_ref)

	pr_debug("%s: stripe %llu\n", __func__,
		(unsigned long long)sh->sector);

	set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
}

static struct dma_async_tx_descriptor *
@@ -686,7 +650,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)

static struct dma_async_tx_descriptor *
ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
		 unsigned long pending)
		 unsigned long ops_request)
{
	int disks = sh->disks;
	int pd_idx = sh->pd_idx, i;
@@ -694,7 +658,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
	/* check if prexor is active which means only process blocks
	 * that are part of a read-modify-write (Wantprexor)
	 */
	int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
	int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);

	pr_debug("%s: stripe %llu\n", __func__,
		(unsigned long long)sh->sector);
@@ -744,7 +708,7 @@ static void ops_complete_postxor(void *stripe_head_ref)
	pr_debug("%s: stripe %llu\n", __func__,
		(unsigned long long)sh->sector);

	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
	sh->reconstruct_state = reconstruct_state_result;
	set_bit(STRIPE_HANDLE, &sh->state);
	release_stripe(sh);
}
@@ -763,16 +727,14 @@ static void ops_complete_write(void *stripe_head_ref)
			set_bit(R5_UPTODATE, &dev->flags);
	}

	set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);

	sh->reconstruct_state = reconstruct_state_drain_result;
	set_bit(STRIPE_HANDLE, &sh->state);
	release_stripe(sh);
}

static void
ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
		unsigned long pending)
		unsigned long ops_request)
{
	/* kernel stack size limits the total number of disks */
	int disks = sh->disks;
@@ -780,7 +742,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,

	int count = 0, pd_idx = sh->pd_idx, i;
	struct page *xor_dest;
	int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
	int prexor = test_bit(STRIPE_OP_PREXOR, &ops_request);
	unsigned long flags;
	dma_async_tx_callback callback;

@@ -807,7 +769,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
	}

	/* check whether this postxor is part of a write */
	callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?
	callback = test_bit(STRIPE_OP_BIODRAIN, &ops_request) ?
		ops_complete_write : ops_complete_postxor;

	/* 1/ if we prexor'd then the dest is reused as a source
@@ -868,8 +830,7 @@ static void ops_run_check(struct stripe_head *sh)
		ops_complete_check, sh);
}

static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
			  unsigned long ops_request)
static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
	int overlap_clear = 0, i, disks = sh->disks;
	struct dma_async_tx_descriptor *tx = NULL;
@@ -880,18 +841,18 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
	}

	if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
		tx = ops_run_compute5(sh, pending);
		tx = ops_run_compute5(sh, ops_request);

	if (test_bit(STRIPE_OP_PREXOR, &pending))
	if (test_bit(STRIPE_OP_PREXOR, &ops_request))
		tx = ops_run_prexor(sh, tx);

	if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
		tx = ops_run_biodrain(sh, tx, pending);
	if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
		tx = ops_run_biodrain(sh, tx, ops_request);
		overlap_clear++;
	}

	if (test_bit(STRIPE_OP_POSTXOR, &pending))
		ops_run_postxor(sh, tx, pending);
	if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
		ops_run_postxor(sh, tx, ops_request);

	if (test_bit(STRIPE_OP_CHECK, &ops_request))
		ops_run_check(sh);
@@ -1684,11 +1645,11 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
	}
}

static int
handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
static void
handle_write_operations5(struct stripe_head *sh, struct stripe_head_state *s,
			 int rcw, int expand)
{
	int i, pd_idx = sh->pd_idx, disks = sh->disks;
	int locked = 0;

	if (rcw) {
		/* if we are not expanding this is a proper write request, and
@@ -1696,12 +1657,12 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
		 * stripe cache
		 */
		if (!expand) {
			set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
			sh->ops.count++;
		}
			sh->reconstruct_state = reconstruct_state_drain_run;
			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
		} else
			sh->reconstruct_state = reconstruct_state_run;

		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
		sh->ops.count++;
		set_bit(STRIPE_OP_POSTXOR, &s->ops_request);

		for (i = disks; i--; ) {
			struct r5dev *dev = &sh->dev[i];
@@ -1710,21 +1671,20 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
				set_bit(R5_LOCKED, &dev->flags);
				if (!expand)
					clear_bit(R5_UPTODATE, &dev->flags);
				locked++;
				s->locked++;
			}
		}
		if (locked + 1 == disks)
		if (s->locked + 1 == disks)
			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
				atomic_inc(&sh->raid_conf->pending_full_writes);
	} else {
		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));

		set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
		set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);

		sh->ops.count += 3;
		sh->reconstruct_state = reconstruct_state_drain_run;
		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
		set_bit(STRIPE_OP_POSTXOR, &s->ops_request);

		for (i = disks; i--; ) {
			struct r5dev *dev = &sh->dev[i];
@@ -1742,7 +1702,7 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
				set_bit(R5_Wantprexor, &dev->flags);
				set_bit(R5_LOCKED, &dev->flags);
				clear_bit(R5_UPTODATE, &dev->flags);
				locked++;
				s->locked++;
			}
		}
	}
@@ -1752,13 +1712,11 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
	 */
	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
	locked++;
	s->locked++;

	pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
	pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
		__func__, (unsigned long long)sh->sector,
		locked, sh->ops.pending);

	return locked;
		s->locked, s->ops_request);
}

/*
@@ -2005,8 +1963,7 @@ static void handle_issuing_new_read_requests5(struct stripe_head *sh,
	 * midst of changing due to a write
	 */
	if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
	    !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
	    !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
	    !sh->reconstruct_state) {
		for (i = disks; i--; )
			if (__handle_issuing_new_read_requests5(
				sh, s, i, disks) == 0)
@@ -2211,7 +2168,7 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
	if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
	    (s->locked == 0 && (rcw == 0 || rmw == 0) &&
	    !test_bit(STRIPE_BIT_DELAY, &sh->state)))
		s->locked += handle_write_operations5(sh, rcw == 0, 0);
		handle_write_operations5(sh, s, rcw == 0, 0);
}

static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
@@ -2581,15 +2538,14 @@ static void handle_stripe5(struct stripe_head *sh)
	struct bio *return_bi = NULL;
	struct stripe_head_state s;
	struct r5dev *dev;
	unsigned long pending = 0;
	mdk_rdev_t *blocked_rdev = NULL;
	int prexor;

	memset(&s, 0, sizeof(s));
	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
		"ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
		atomic_read(&sh->count), sh->pd_idx,
		sh->ops.pending, sh->ops.ack, sh->ops.complete);
	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
		 "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state,
		 atomic_read(&sh->count), sh->pd_idx, sh->check_state,
		 sh->reconstruct_state);

	spin_lock(&sh->lock);
	clear_bit(STRIPE_HANDLE, &sh->state);
@@ -2703,34 +2659,12 @@ static void handle_stripe5(struct stripe_head *sh)
	/* Now we check to see if any write operations have recently
	 * completed
	 */

	/* leave prexor set until postxor is done, allows us to distinguish
	 * a rmw from a rcw during biodrain
	 */
	prexor = 0;
	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {

		prexor = 1;
		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);

	if (sh->reconstruct_state == reconstruct_state_drain_result) {
		sh->reconstruct_state = reconstruct_state_idle;
		for (i = disks; i--; )
			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
	}

	/* if only POSTXOR is set then this is an 'expand' postxor */
	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {

		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);

		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
			prexor += test_and_clear_bit(R5_Wantprexor,
						     &sh->dev[i].flags);

		/* All the 'written' buffers and the parity block are ready to
		 * be written back to disk
@@ -2763,8 +2697,7 @@ static void handle_stripe5(struct stripe_head *sh)
	 * 2/ A 'check' operation is in flight, as it may clobber the parity
	 *    block.
	 */
	if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
	    !sh->check_state)
	if (s.to_write && !sh->reconstruct_state && !sh->check_state)
		handle_issuing_new_write_requests5(conf, sh, &s, disks);

	/* maybe we need to check and possibly fix the parity for this stripe
@@ -2805,18 +2738,10 @@ static void handle_stripe5(struct stripe_head *sh)
		}
	}

	/* Finish postxor operations initiated by the expansion
	 * process
	 */
	if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
		!test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {

	/* Finish reconstruct operations initiated by the expansion process */
	if (sh->reconstruct_state == reconstruct_state_result) {
		sh->reconstruct_state = reconstruct_state_idle;
		clear_bit(STRIPE_EXPANDING, &sh->state);

		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);

		for (i = conf->raid_disks; i--; )
			set_bit(R5_Wantwrite, &sh->dev[i].flags);
			set_bit(R5_LOCKED, &dev->flags);
@@ -2824,15 +2749,13 @@ static void handle_stripe5(struct stripe_head *sh)
	}

	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
	    !sh->reconstruct_state) {
		/* Need to write out all blocks after computing parity */
		sh->disks = conf->raid_disks;
		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
			conf->raid_disks);
		s.locked += handle_write_operations5(sh, 1, 1);
	} else if (s.expanded &&
		   s.locked == 0 &&
		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
		handle_write_operations5(sh, &s, 1, 1);
	} else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
		clear_bit(STRIPE_EXPAND_READY, &sh->state);
		atomic_dec(&conf->reshape_stripes);
		wake_up(&conf->wait_for_overlap);
@@ -2843,9 +2766,6 @@ static void handle_stripe5(struct stripe_head *sh)
	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state))
		handle_stripe_expansion(conf, sh, NULL);

	if (sh->ops.count)
		pending = get_stripe_work(sh);

 unlock:
	spin_unlock(&sh->lock);

@@ -2853,8 +2773,8 @@ static void handle_stripe5(struct stripe_head *sh)
	if (unlikely(blocked_rdev))
		md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);

	if (pending || s.ops_request)
		raid5_run_ops(sh, pending, s.ops_request);
	if (s.ops_request)
		raid5_run_ops(sh, s.ops_request);

	ops_run_io(sh, &s);

+1 −8
Original line number Diff line number Diff line
@@ -205,19 +205,12 @@ struct stripe_head {
	int			bm_seq;	/* sequence number for bitmap flushes */
	int			disks;			/* disks in stripe */
	enum check_states	check_state;
	enum reconstruct_states reconstruct_state;
	/* stripe_operations
	 * @pending - pending ops flags (set for request->issue->complete)
	 * @ack - submitted ops flags (set for issue->complete)
	 * @complete - completed ops flags (set for complete)
	 * @target - STRIPE_OP_COMPUTE_BLK target
	 * @count - raid5_runs_ops is set to run when this is non-zero
	 */
	struct stripe_operations {
		unsigned long	   pending;
		unsigned long	   ack;
		unsigned long	   complete;
		int		   target;
		int		   count;
		u32		   zero_sum_result;
	} ops;
	struct r5dev {