md/raid5: activate raid6 rmw feature (584acdd4) · Commits · e / devices / android_kernel_fairphone_FP4

crypto/async_tx/async_pq.c

+16 −3

Original line number	Diff line number	Diff line
		@@ -124,6 +124,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
		{
		void **srcs;
		int i;
		int start = -1, stop = disks - 3;

		if (submit->scribble)
		srcs = submit->scribble;
		@@ -134,9 +135,20 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
		if (blocks[i] == NULL) {
		BUG_ON(i > disks - 3); /* P or Q can't be zero */
		srcs[i] = (void*)raid6_empty_zero_page;
		} else
		} else {
		srcs[i] = page_address(blocks[i]) + offset;
		if (i < disks - 2) {
		stop = i;
		if (start == -1)
		start = i;
		}
		}
		}
		if (submit->flags & ASYNC_TX_PQ_XOR_DST) {
		BUG_ON(!raid6_call.xor_syndrome);
		if (start >= 0)
		raid6_call.xor_syndrome(disks, start, stop, len, srcs);
		} else
		raid6_call.gen_syndrome(disks, len, srcs);
		async_tx_sync_epilog(submit);
		}
		@@ -178,7 +190,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
		if (device)
		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);

		if (unmap &&
		/* XORing P/Q is only implemented in software */
		if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
		(src_cnt <= dma_maxpq(device, 0) \|\|
		dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
		is_dma_pq_aligned(device, offset, 0, len)) {

drivers/md/raid5.c

+78 −26

Original line number	Diff line number	Diff line
		@@ -1317,7 +1317,9 @@ ops_run_compute5(struct stripe_head sh, struct raid5_percpu percpu)
		* destination buffer is recorded in srcs[count] and the Q destination
		* is recorded in srcs[count+1]].
		*/
		static int set_syndrome_sources(struct page *srcs, struct stripe_head sh)
		static int set_syndrome_sources(struct page **srcs,
		struct stripe_head *sh,
		int srctype)
		{
		int disks = sh->disks;
		int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
		@@ -1332,7 +1334,14 @@ static int set_syndrome_sources(struct page *srcs, struct stripe_head sh)
		i = d0_idx;
		do {
		int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
		struct r5dev *dev = &sh->dev[i];

		if (i == sh->qd_idx \|\| i == sh->pd_idx \|\|
		(srctype == SYNDROME_SRC_ALL) \|\|
		(srctype == SYNDROME_SRC_WANT_DRAIN &&
		test_bit(R5_Wantdrain, &dev->flags)) \|\|
		(srctype == SYNDROME_SRC_WRITTEN &&
		dev->written))
		srcs[slot] = sh->dev[i].page;
		i = raid6_next_disk(i, disks);
		} while (i != d0_idx);
		@@ -1373,7 +1382,7 @@ ops_run_compute6_1(struct stripe_head sh, struct raid5_percpu percpu)
		atomic_inc(&sh->count);

		if (target == qd_idx) {
		count = set_syndrome_sources(blocks, sh);
		count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
		blocks[count] = NULL; /* regenerating p is not necessary */
		BUG_ON(blocks[count+1] != dest); /* q should already be set */
		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
		@@ -1481,7 +1490,7 @@ ops_run_compute6_2(struct stripe_head sh, struct raid5_percpu percpu)
		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
		&submit);

		count = set_syndrome_sources(blocks, sh);
		count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
		init_async_submit(&submit, ASYNC_TX_FENCE, tx,
		ops_complete_compute, sh,
		to_addr_conv(sh, percpu, 0));
		@@ -1515,7 +1524,7 @@ static void ops_complete_prexor(void *stripe_head_ref)
		}

		static struct dma_async_tx_descriptor *
		ops_run_prexor(struct stripe_head sh, struct raid5_percpu percpu,
		ops_run_prexor5(struct stripe_head sh, struct raid5_percpu percpu,
		struct dma_async_tx_descriptor *tx)
		{
		int disks = sh->disks;
		@@ -1544,6 +1553,26 @@ ops_run_prexor(struct stripe_head sh, struct raid5_percpu percpu,
		return tx;
		}

		static struct dma_async_tx_descriptor *
		ops_run_prexor6(struct stripe_head sh, struct raid5_percpu percpu,
		struct dma_async_tx_descriptor *tx)
		{
		struct page **blocks = to_addr_page(percpu, 0);
		int count;
		struct async_submit_ctl submit;

		pr_debug("%s: stripe %llu\n", __func__,
		(unsigned long long)sh->sector);

		count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN);

		init_async_submit(&submit, ASYNC_TX_FENCE\|ASYNC_TX_PQ_XOR_DST, tx,
		ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);

		return tx;
		}

		static struct dma_async_tx_descriptor *
		ops_run_biodrain(struct stripe_head sh, struct dma_async_tx_descriptor tx)
		{
		@@ -1746,6 +1775,8 @@ ops_run_reconstruct6(struct stripe_head sh, struct raid5_percpu percpu,
		int count, i, j = 0;
		struct stripe_head *head_sh = sh;
		int last_stripe;
		int synflags;
		unsigned long txflags;

		pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);

		@@ -1765,14 +1796,23 @@ ops_run_reconstruct6(struct stripe_head sh, struct raid5_percpu percpu,

		again:
		blocks = to_addr_page(percpu, j);
		count = set_syndrome_sources(blocks, sh);

		if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
		synflags = SYNDROME_SRC_WRITTEN;
		txflags = ASYNC_TX_ACK \| ASYNC_TX_PQ_XOR_DST;
		} else {
		synflags = SYNDROME_SRC_ALL;
		txflags = ASYNC_TX_ACK;
		}

		count = set_syndrome_sources(blocks, sh, synflags);
		last_stripe = !head_sh->batch_head \|\|
		list_first_entry(&sh->batch_list,
		struct stripe_head, batch_list) == head_sh;

		if (last_stripe) {
		atomic_inc(&head_sh->count);
		init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
		init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
		head_sh, to_addr_conv(sh, percpu, j));
		} else
		init_async_submit(&submit, 0, tx, NULL, NULL,
		@@ -1843,7 +1883,7 @@ static void ops_run_check_pq(struct stripe_head sh, struct raid5_percpu percpu
		(unsigned long long)sh->sector, checkp);

		BUG_ON(sh->batch_head);
		count = set_syndrome_sources(srcs, sh);
		count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL);
		if (!checkp)
		srcs[count] = NULL;

		@@ -1884,8 +1924,12 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
		async_tx_ack(tx);
		}

		if (test_bit(STRIPE_OP_PREXOR, &ops_request))
		tx = ops_run_prexor(sh, percpu, tx);
		if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
		if (level < 6)
		tx = ops_run_prexor5(sh, percpu, tx);
		else
		tx = ops_run_prexor6(sh, percpu, tx);
		}

		if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
		tx = ops_run_biodrain(sh, tx);
		@@ -2770,7 +2814,7 @@ static void
		schedule_reconstruction(struct stripe_head sh, struct stripe_head_state s,
		int rcw, int expand)
		{
		int i, pd_idx = sh->pd_idx, disks = sh->disks;
		int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
		struct r5conf *conf = sh->raid_conf;
		int level = conf->level;

		@@ -2806,13 +2850,15 @@ schedule_reconstruction(struct stripe_head sh, struct stripe_head_state s,
		if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
		atomic_inc(&conf->pending_full_writes);
		} else {
		BUG_ON(level == 6);
		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) \|\|
		test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
		BUG_ON(level == 6 &&
		(!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) \|\|
		test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));

		for (i = disks; i--; ) {
		struct r5dev *dev = &sh->dev[i];
		if (i == pd_idx)
		if (i == pd_idx \|\| i == qd_idx)
		continue;

		if (dev->towrite &&
		@@ -3476,28 +3522,27 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		int rmw = 0, rcw = 0, i;
		sector_t recovery_cp = conf->mddev->recovery_cp;

		/* RAID6 requires 'rcw' in current implementation.
		* Otherwise, check whether resync is now happening or should start.
		/* Check whether resync is now happening or should start.
		* If yes, then the array is dirty (after unclean shutdown or
		* initial creation), so parity in some stripes might be inconsistent.
		* In this case, we need to always do reconstruct-write, to ensure
		* that in case of drive failure or read-error correction, we
		* generate correct data from the parity.
		*/
		if (conf->max_degraded == 2 \|\|
		if (conf->rmw_level == PARITY_DISABLE_RMW \|\|
		(recovery_cp < MaxSector && sh->sector >= recovery_cp &&
		s->failed == 0)) {
		/* Calculate the real rcw later - for now make it
		* look like rcw is cheaper
		*/
		rcw = 1; rmw = 2;
		pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n",
		conf->max_degraded, (unsigned long long)recovery_cp,
		pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
		conf->rmw_level, (unsigned long long)recovery_cp,
		(unsigned long long)sh->sector);
		} else for (i = disks; i--; ) {
		/* would I have to read this buffer for read_modify_write */
		struct r5dev *dev = &sh->dev[i];
		if ((dev->towrite \|\| i == sh->pd_idx) &&
		if ((dev->towrite \|\| i == sh->pd_idx \|\| i == sh->qd_idx) &&
		!test_bit(R5_LOCKED, &dev->flags) &&
		!(test_bit(R5_UPTODATE, &dev->flags) \|\|
		test_bit(R5_Wantcompute, &dev->flags))) {
		@@ -3507,7 +3552,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		rmw += 2disks; / cannot read it */
		}
		/* Would I have to read this buffer for reconstruct_write */
		if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
		if (!test_bit(R5_OVERWRITE, &dev->flags) &&
		i != sh->pd_idx && i != sh->qd_idx &&
		!test_bit(R5_LOCKED, &dev->flags) &&
		!(test_bit(R5_UPTODATE, &dev->flags) \|\|
		test_bit(R5_Wantcompute, &dev->flags))) {
		@@ -3520,7 +3566,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		pr_debug("for sector %llu, rmw=%d rcw=%d\n",
		(unsigned long long)sh->sector, rmw, rcw);
		set_bit(STRIPE_HANDLE, &sh->state);
		if (rmw < rcw && rmw > 0) {
		if ((rmw < rcw \|\| (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
		/* prefer read-modify-write, but need to get some data */
		if (conf->mddev->queue)
		blk_add_trace_msg(conf->mddev->queue,
		@@ -3528,7 +3574,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		(unsigned long long)sh->sector, rmw);
		for (i = disks; i--; ) {
		struct r5dev *dev = &sh->dev[i];
		if ((dev->towrite \|\| i == sh->pd_idx) &&
		if ((dev->towrite \|\| i == sh->pd_idx \|\| i == sh->qd_idx) &&
		!test_bit(R5_LOCKED, &dev->flags) &&
		!(test_bit(R5_UPTODATE, &dev->flags) \|\|
		test_bit(R5_Wantcompute, &dev->flags)) &&
		@@ -3547,7 +3593,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		}
		}
		}
		if (rcw <= rmw && rcw > 0) {
		if ((rcw < rmw \|\| (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
		/* want reconstruct write, but need to get some data */
		int qread =0;
		rcw = 0;
		@@ -6344,10 +6390,16 @@ static struct r5conf setup_conf(struct mddev mddev)
		}

		conf->level = mddev->new_level;
		if (conf->level == 6)
		if (conf->level == 6) {
		conf->max_degraded = 2;
		if (raid6_call.xor_syndrome)
		conf->rmw_level = PARITY_ENABLE_RMW;
		else
		conf->rmw_level = PARITY_DISABLE_RMW;
		} else {
		conf->max_degraded = 1;
		conf->rmw_level = PARITY_ENABLE_RMW;
		}
		conf->algorithm = mddev->new_layout;
		conf->reshape_progress = mddev->reshape_position;
		if (conf->reshape_progress != MaxSector) {

drivers/md/raid5.h

+18 −1

Original line number	Diff line number	Diff line
		@@ -355,6 +355,23 @@ enum {
		STRIPE_OP_RECONSTRUCT,
		STRIPE_OP_CHECK,
		};

		/*
		* RAID parity calculation preferences
		*/
		enum {
		PARITY_DISABLE_RMW = 0,
		PARITY_ENABLE_RMW,
		};

		/*
		* Pages requested from set_syndrome_sources()
		*/
		enum {
		SYNDROME_SRC_ALL,
		SYNDROME_SRC_WANT_DRAIN,
		SYNDROME_SRC_WRITTEN,
		};
		/*
		* Plugging:
		*
		@@ -411,7 +428,7 @@ struct r5conf {
		spinlock_t hash_locks[NR_STRIPE_HASH_LOCKS];
		struct mddev *mddev;
		int chunk_sectors;
		int level, algorithm;
		int level, algorithm, rmw_level;
		int max_degraded;
		int raid_disks;
		int max_nr_stripes;

include/linux/async_tx.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -60,12 +60,15 @@ struct dma_chan_ref {
		* dependency chain
		* @ASYNC_TX_FENCE: specify that the next operation in the dependency
		* chain uses this operation's result as an input
		* @ASYNC_TX_PQ_XOR_DST: do not overwrite the syndrome but XOR it with the
		* input data. Required for rmw case.
		*/
		enum async_tx_flags {
		ASYNC_TX_XOR_ZERO_DST = (1 << 0),
		ASYNC_TX_XOR_DROP_DST = (1 << 1),
		ASYNC_TX_ACK = (1 << 2),
		ASYNC_TX_FENCE = (1 << 3),
		ASYNC_TX_PQ_XOR_DST = (1 << 4),
		};

		/**