Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 417b8d4a authored by Dan Williams's avatar Dan Williams Committed by NeilBrown
Browse files

md/raid456: downlevel multicore operations to raid_run_ops



The percpu conversion allowed a straightforward handoff of stripe
processing to the async subsytem that initially showed some modest gains
(+4%).  However, this model is too simplistic and leads to stripes
bouncing between raid5d and the async thread pool for every invocation
of handle_stripe().  As reported by Holger this can fall into a
pathological situation severely impacting throughput (6x performance
loss).

By downleveling the parallelism to raid_run_ops the pathological
stripe_head bouncing is eliminated.  This version still exhibits an
average 11% throughput loss for:

	mdadm --create /dev/md0 /dev/sd[b-q] -n 16 -l 6
	echo 1024 > /sys/block/md0/md/stripe_cache_size
	dd if=/dev/zero of=/dev/md0 bs=1024k count=2048

...but the results are at least stable and can be used as a base for
further multicore experimentation.

Reported-by: default avatarHolger Kiehl <Holger.Kiehl@dwd.de>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent dce3a7a4
Loading
Loading
Loading
Loading
+40 −35
Original line number Diff line number Diff line
@@ -1139,7 +1139,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu
			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);
}

static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
	int overlap_clear = 0, i, disks = sh->disks;
	struct dma_async_tx_descriptor *tx = NULL;
@@ -1204,6 +1204,36 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
	put_cpu();
}

#ifdef CONFIG_MULTICORE_RAID456
static void async_run_ops(void *param, async_cookie_t cookie)
{
	struct stripe_head *sh = param;
	unsigned long ops_request = sh->ops.request;

	clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state);
	wake_up(&sh->ops.wait_for_ops);

	__raid_run_ops(sh, ops_request);
	release_stripe(sh);
}

static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
{
	/* since handle_stripe can be called outside of raid5d context
	 * we need to ensure sh->ops.request is de-staged before another
	 * request arrives
	 */
	wait_event(sh->ops.wait_for_ops,
		   !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state));
	sh->ops.request = ops_request;

	atomic_inc(&sh->count);
	async_schedule(async_run_ops, sh);
}
#else
#define raid_run_ops __raid_run_ops
#endif

static int grow_one_stripe(raid5_conf_t *conf)
{
	struct stripe_head *sh;
@@ -1213,6 +1243,9 @@ static int grow_one_stripe(raid5_conf_t *conf)
	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
	sh->raid_conf = conf;
	spin_lock_init(&sh->lock);
	#ifdef CONFIG_MULTICORE_RAID456
	init_waitqueue_head(&sh->ops.wait_for_ops);
	#endif

	if (grow_buffers(sh, conf->raid_disks)) {
		shrink_buffers(sh, conf->raid_disks);
@@ -1329,6 +1362,9 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)

		nsh->raid_conf = conf;
		spin_lock_init(&nsh->lock);
		#ifdef CONFIG_MULTICORE_RAID456
		init_waitqueue_head(&nsh->ops.wait_for_ops);
		#endif

		list_add(&nsh->lru, &newstripes);
	}
@@ -4342,37 +4378,6 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
	return handled;
}

#ifdef CONFIG_MULTICORE_RAID456
static void __process_stripe(void *param, async_cookie_t cookie)
{
	struct stripe_head *sh = param;

	handle_stripe(sh);
	release_stripe(sh);
}

static void process_stripe(struct stripe_head *sh, struct list_head *domain)
{
	async_schedule_domain(__process_stripe, sh, domain);
}

static void synchronize_stripe_processing(struct list_head *domain)
{
	async_synchronize_full_domain(domain);
}
#else
static void process_stripe(struct stripe_head *sh, struct list_head *domain)
{
	handle_stripe(sh);
	release_stripe(sh);
	cond_resched();
}

static void synchronize_stripe_processing(struct list_head *domain)
{
}
#endif


/*
 * This is our raid5 kernel thread.
@@ -4386,7 +4391,6 @@ static void raid5d(mddev_t *mddev)
	struct stripe_head *sh;
	raid5_conf_t *conf = mddev->private;
	int handled;
	LIST_HEAD(raid_domain);

	pr_debug("+++ raid5d active\n");

@@ -4423,7 +4427,9 @@ static void raid5d(mddev_t *mddev)
		spin_unlock_irq(&conf->device_lock);
		
		handled++;
		process_stripe(sh, &raid_domain);
		handle_stripe(sh);
		release_stripe(sh);
		cond_resched();

		spin_lock_irq(&conf->device_lock);
	}
@@ -4431,7 +4437,6 @@ static void raid5d(mddev_t *mddev)

	spin_unlock_irq(&conf->device_lock);

	synchronize_stripe_processing(&raid_domain);
	async_tx_issue_pending_all();
	unplug_slaves(mddev);

+11 −1
Original line number Diff line number Diff line
@@ -214,12 +214,20 @@ struct stripe_head {
	int			disks;		/* disks in stripe */
	enum check_states	check_state;
	enum reconstruct_states reconstruct_state;
	/* stripe_operations
	/**
	 * struct stripe_operations
	 * @target - STRIPE_OP_COMPUTE_BLK target
	 * @target2 - 2nd compute target in the raid6 case
	 * @zero_sum_result - P and Q verification flags
	 * @request - async service request flags for raid_run_ops
	 */
	struct stripe_operations {
		int 		     target, target2;
		enum sum_check_flags zero_sum_result;
		#ifdef CONFIG_MULTICORE_RAID456
		unsigned long	     request;
		wait_queue_head_t    wait_for_ops;
		#endif
	} ops;
	struct r5dev {
		struct bio	req;
@@ -294,6 +302,8 @@ struct r6_state {
#define	STRIPE_FULL_WRITE	13 /* all blocks are set to be overwritten */
#define	STRIPE_BIOFILL_RUN	14
#define	STRIPE_COMPUTE_RUN	15
#define	STRIPE_OPS_REQ_PENDING	16

/*
 * Operation request flags
 */