Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 36d1c647 authored by Dan Williams's avatar Dan Williams
Browse files

md/raid6: move the spare page to a percpu allocation



In preparation for asynchronous handling of raid6 operations move the
spare page to a percpu allocation to allow multiple simultaneous
synchronous raid6 recovery operations.

Make this allocation cpu hotplug aware to maximize allocation
efficiency.

Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>

parent a11034b4
Loading
Loading
Loading
Loading
+168 −84
Original line number Original line Diff line number Diff line
@@ -48,6 +48,7 @@
#include <linux/raid/pq.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
#include <linux/async_tx.h>
#include <linux/seq_file.h>
#include <linux/seq_file.h>
#include <linux/cpu.h>
#include "md.h"
#include "md.h"
#include "raid5.h"
#include "raid5.h"
#include "bitmap.h"
#include "bitmap.h"
@@ -2566,13 +2567,14 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,


static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
				  struct stripe_head_state *s,
				  struct stripe_head_state *s,
				struct r6_state *r6s, struct page *tmp_page,
				  struct r6_state *r6s, int disks)
				int disks)
{
{
	int update_p = 0, update_q = 0;
	int update_p = 0, update_q = 0;
	struct r5dev *dev;
	struct r5dev *dev;
	int pd_idx = sh->pd_idx;
	int pd_idx = sh->pd_idx;
	int qd_idx = sh->qd_idx;
	int qd_idx = sh->qd_idx;
	unsigned long cpu;
	struct page *tmp_page;


	set_bit(STRIPE_HANDLE, &sh->state);
	set_bit(STRIPE_HANDLE, &sh->state);


@@ -2583,12 +2585,8 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
	 * case we can only check one of them, possibly using the
	 * case we can only check one of them, possibly using the
	 * other to generate missing data
	 * other to generate missing data
	 */
	 */

	cpu = get_cpu();
	/* If !tmp_page, we cannot do the calculations,
	tmp_page = per_cpu_ptr(conf->percpu, cpu)->spare_page;
	 * but as we have set STRIPE_HANDLE, we will soon be called
	 * by stripe_handle with a tmp_page - just wait until then.
	 */
	if (tmp_page) {
	if (s->failed == r6s->q_failed) {
	if (s->failed == r6s->q_failed) {
		/* The only possible failed device holds 'Q', so it
		/* The only possible failed device holds 'Q', so it
		 * makes sense to check P (If anything else were failed,
		 * makes sense to check P (If anything else were failed,
@@ -2615,6 +2613,8 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
			update_q = 1;
			update_q = 1;
		}
		}
	}
	}
	put_cpu();

	if (update_p || update_q) {
	if (update_p || update_q) {
		conf->mddev->resync_mismatches += STRIPE_SECTORS;
		conf->mddev->resync_mismatches += STRIPE_SECTORS;
		if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
		if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
@@ -2655,7 +2655,6 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,


	set_bit(STRIPE_INSYNC, &sh->state);
	set_bit(STRIPE_INSYNC, &sh->state);
}
}
}


static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
				struct r6_state *r6s)
				struct r6_state *r6s)
@@ -3009,7 +3008,7 @@ static bool handle_stripe5(struct stripe_head *sh)
	return blocked_rdev == NULL;
	return blocked_rdev == NULL;
}
}


static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
static bool handle_stripe6(struct stripe_head *sh)
{
{
	raid5_conf_t *conf = sh->raid_conf;
	raid5_conf_t *conf = sh->raid_conf;
	int disks = sh->disks;
	int disks = sh->disks;
@@ -3164,7 +3163,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
	 * data is available
	 * data is available
	 */
	 */
	if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
	if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
		handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
		handle_parity_checks6(conf, sh, &s, &r6s, disks);


	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3247,16 +3246,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
}
}


/* returns true if the stripe was handled */
/* returns true if the stripe was handled */
static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
static bool handle_stripe(struct stripe_head *sh)
{
{
	if (sh->raid_conf->level == 6)
	if (sh->raid_conf->level == 6)
		return handle_stripe6(sh, tmp_page);
		return handle_stripe6(sh);
	else
	else
		return handle_stripe5(sh);
		return handle_stripe5(sh);
}
}




static void raid5_activate_delayed(raid5_conf_t *conf)
static void raid5_activate_delayed(raid5_conf_t *conf)
{
{
	if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
	if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -4047,7 +4044,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
	spin_unlock(&sh->lock);
	spin_unlock(&sh->lock);


	/* wait for any blocked device to be handled */
	/* wait for any blocked device to be handled */
	while(unlikely(!handle_stripe(sh, NULL)))
	while (unlikely(!handle_stripe(sh)))
		;
		;
	release_stripe(sh);
	release_stripe(sh);


@@ -4104,7 +4101,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
			return handled;
			return handled;
		}
		}


		handle_stripe(sh, NULL);
		handle_stripe(sh);
		release_stripe(sh);
		release_stripe(sh);
		handled++;
		handled++;
	}
	}
@@ -4168,7 +4165,7 @@ static void raid5d(mddev_t *mddev)
		spin_unlock_irq(&conf->device_lock);
		spin_unlock_irq(&conf->device_lock);
		
		
		handled++;
		handled++;
		handle_stripe(sh, conf->spare_page);
		handle_stripe(sh);
		release_stripe(sh);
		release_stripe(sh);


		spin_lock_irq(&conf->device_lock);
		spin_lock_irq(&conf->device_lock);
@@ -4309,15 +4306,104 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
	return sectors * (raid_disks - conf->max_degraded);
	return sectors * (raid_disks - conf->max_degraded);
}
}


static void raid5_free_percpu(raid5_conf_t *conf)
{
	struct raid5_percpu *percpu;
	unsigned long cpu;

	if (!conf->percpu)
		return;

	get_online_cpus();
	for_each_possible_cpu(cpu) {
		percpu = per_cpu_ptr(conf->percpu, cpu);
		safe_put_page(percpu->spare_page);
	}
#ifdef CONFIG_HOTPLUG_CPU
	unregister_cpu_notifier(&conf->cpu_notify);
#endif
	put_online_cpus();

	free_percpu(conf->percpu);
}

static void free_conf(raid5_conf_t *conf)
static void free_conf(raid5_conf_t *conf)
{
{
	shrink_stripes(conf);
	shrink_stripes(conf);
	safe_put_page(conf->spare_page);
	raid5_free_percpu(conf);
	kfree(conf->disks);
	kfree(conf->disks);
	kfree(conf->stripe_hashtbl);
	kfree(conf->stripe_hashtbl);
	kfree(conf);
	kfree(conf);
}
}


#ifdef CONFIG_HOTPLUG_CPU
static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
			      void *hcpu)
{
	raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
	long cpu = (long)hcpu;
	struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
		if (!percpu->spare_page)
			percpu->spare_page = alloc_page(GFP_KERNEL);
		if (!percpu->spare_page) {
			pr_err("%s: failed memory allocation for cpu%ld\n",
			       __func__, cpu);
			return NOTIFY_BAD;
		}
		break;
	case CPU_DEAD:
	case CPU_DEAD_FROZEN:
		safe_put_page(percpu->spare_page);
		percpu->spare_page = NULL;
		break;
	default:
		break;
	}
	return NOTIFY_OK;
}
#endif

static int raid5_alloc_percpu(raid5_conf_t *conf)
{
	unsigned long cpu;
	struct page *spare_page;
	struct raid5_percpu *allcpus;
	int err;

	/* the only percpu data is the raid6 spare page */
	if (conf->level != 6)
		return 0;

	allcpus = alloc_percpu(struct raid5_percpu);
	if (!allcpus)
		return -ENOMEM;
	conf->percpu = allcpus;

	get_online_cpus();
	err = 0;
	for_each_present_cpu(cpu) {
		spare_page = alloc_page(GFP_KERNEL);
		if (!spare_page) {
			err = -ENOMEM;
			break;
		}
		per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
	}
#ifdef CONFIG_HOTPLUG_CPU
	conf->cpu_notify.notifier_call = raid456_cpu_notify;
	conf->cpu_notify.priority = 0;
	if (err == 0)
		err = register_cpu_notifier(&conf->cpu_notify);
#endif
	put_online_cpus();

	return err;
}

static raid5_conf_t *setup_conf(mddev_t *mddev)
static raid5_conf_t *setup_conf(mddev_t *mddev)
{
{
	raid5_conf_t *conf;
	raid5_conf_t *conf;
@@ -4372,11 +4458,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
		goto abort;
		goto abort;


	if (mddev->new_level == 6) {
	conf->level = mddev->new_level;
		conf->spare_page = alloc_page(GFP_KERNEL);
	if (raid5_alloc_percpu(conf) != 0)
		if (!conf->spare_page)
		goto abort;
		goto abort;
	}

	spin_lock_init(&conf->device_lock);
	spin_lock_init(&conf->device_lock);
	init_waitqueue_head(&conf->wait_for_stripe);
	init_waitqueue_head(&conf->wait_for_stripe);
	init_waitqueue_head(&conf->wait_for_overlap);
	init_waitqueue_head(&conf->wait_for_overlap);
@@ -4412,7 +4497,6 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
	}
	}


	conf->chunk_size = mddev->new_chunk;
	conf->chunk_size = mddev->new_chunk;
	conf->level = mddev->new_level;
	if (conf->level == 6)
	if (conf->level == 6)
		conf->max_degraded = 2;
		conf->max_degraded = 2;
	else
	else
+7 −2
Original line number Original line Diff line number Diff line
@@ -383,8 +383,13 @@ struct raid5_private_data {
					    * (fresh device added).
					    * (fresh device added).
					    * Cleared when a sync completes.
					    * Cleared when a sync completes.
					    */
					    */

	/* per cpu variables */
	struct raid5_percpu {
		struct page	*spare_page; /* Used when checking P/Q in raid6 */
		struct page	*spare_page; /* Used when checking P/Q in raid6 */
	} *percpu;
#ifdef CONFIG_HOTPLUG_CPU
	struct notifier_block	cpu_notify;
#endif


	/*
	/*
	 * Free stripes pool
	 * Free stripes pool