Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ac322de6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'md/4.4' of git://neil.brown.name/md

Pull md updates from Neil Brown:
 "Two major components to this update.

   1) The clustered-raid1 support from SUSE is nearly complete.  There
      are a few outstanding issues being worked on.  Maybe half a dozen
      patches will bring this to a usable state.

   2) The first stage of journalled-raid5 support from Facebook makes an
      appearance.  With a journal device configured (typically NVRAM or
      SSD), the "RAID5 write hole" should be closed - a crash during
      degraded operations cannot result in data corruption.

      The next stage will be to use the journal as a write-behind cache
      so that latency can be reduced and in some cases throughput
      increased by performing more full-stripe writes.

* tag 'md/4.4' of git://neil.brown.name/md: (66 commits)
  MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW
  MD: set journal disk ->raid_disk
  MD: kick out journal disk if it's not fresh
  raid5-cache: start raid5 readonly if journal is missing
  MD: add new bit to indicate raid array with journal
  raid5-cache: IO error handling
  raid5: journal disk can't be removed
  raid5-cache: add trim support for log
  MD: fix info output for journal disk
  raid5-cache: use bio chaining
  raid5-cache: small log->seq cleanup
  raid5-cache: new helper: r5_reserve_log_entry
  raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
  raid5-cache: take rdev->data_offset into account early on
  raid5-cache: refactor bio allocation
  raid5-cache: clean up r5l_get_meta
  raid5-cache: simplify state machine when caches flushes are not needed
  raid5-cache: factor out a helper to run all stripes for an I/O unit
  raid5-cache: rename flushed_ios to finished_ios
  raid5-cache: free I/O units earlier
  ...
parents ccf21b69 339421de
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -17,7 +17,7 @@ dm-cache-smq-y += dm-cache-policy-smq.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
dm-era-y	+= dm-era-target.o
dm-era-y	+= dm-era-target.o
md-mod-y	+= md.o bitmap.o
md-mod-y	+= md.o bitmap.o
raid456-y	+= raid5.o
raid456-y	+= raid5.o raid5-cache.o


# Note: link order is important.  All raid personalities
# Note: link order is important.  All raid personalities
# and must come before md.o, as they each initialise 
# and must come before md.o, as they each initialise 
+6 −8
Original line number Original line Diff line number Diff line
@@ -613,12 +613,10 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
	write_behind = le32_to_cpu(sb->write_behind);
	write_behind = le32_to_cpu(sb->write_behind);
	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
	/* XXX: This is a hack to ensure that we don't use clustering
	/* Setup nodes/clustername only if bitmap version is
	 *  in case:
	 * cluster-compatible
	 *	- dm-raid is in use and
	 *	- the nodes written in bitmap_sb is erroneous.
	 */
	 */
	if (!bitmap->mddev->sync_super) {
	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
		nodes = le32_to_cpu(sb->nodes);
		nodes = le32_to_cpu(sb->nodes);
		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
				sb->cluster_name, 64);
				sb->cluster_name, 64);
@@ -628,7 +626,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
		reason = "bad magic";
		reason = "bad magic";
	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
		 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED)
		reason = "unrecognized superblock version";
		reason = "unrecognized superblock version";
	else if (chunksize < 512)
	else if (chunksize < 512)
		reason = "bitmap chunksize too small";
		reason = "bitmap chunksize too small";
@@ -1572,7 +1570,7 @@ void bitmap_close_sync(struct bitmap *bitmap)
}
}
EXPORT_SYMBOL(bitmap_close_sync);
EXPORT_SYMBOL(bitmap_close_sync);


void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
{
{
	sector_t s = 0;
	sector_t s = 0;
	sector_t blocks;
	sector_t blocks;
@@ -1583,7 +1581,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
		bitmap->last_end_sync = jiffies;
		bitmap->last_end_sync = jiffies;
		return;
		return;
	}
	}
	if (time_before(jiffies, (bitmap->last_end_sync
	if (!force && time_before(jiffies, (bitmap->last_end_sync
				  + bitmap->mddev->bitmap_info.daemon_sleep)))
				  + bitmap->mddev->bitmap_info.daemon_sleep)))
		return;
		return;
	wait_event(bitmap->mddev->recovery_wait,
	wait_event(bitmap->mddev->recovery_wait,
+3 −1
Original line number Original line Diff line number Diff line
@@ -9,8 +9,10 @@
#define BITMAP_MAJOR_LO 3
#define BITMAP_MAJOR_LO 3
/* version 4 insists the bitmap is in little-endian order
/* version 4 insists the bitmap is in little-endian order
 * with version 3, it is host-endian which is non-portable
 * with version 3, it is host-endian which is non-portable
 * Version 5 is currently set only for clustered devices
 */
 */
#define BITMAP_MAJOR_HI 4
#define BITMAP_MAJOR_HI 4
#define BITMAP_MAJOR_CLUSTERED 5
#define	BITMAP_MAJOR_HOSTENDIAN 3
#define	BITMAP_MAJOR_HOSTENDIAN 3


/*
/*
@@ -255,7 +257,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int degraded);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);


void bitmap_unplug(struct bitmap *bitmap);
void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);
void bitmap_daemon_work(struct mddev *mddev);
+129 −99
Original line number Original line Diff line number Diff line
@@ -28,6 +28,7 @@ struct dlm_lock_resource {
	struct completion completion; /* completion for synchronized locking */
	struct completion completion; /* completion for synchronized locking */
	void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
	void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
	struct mddev *mddev; /* pointing back to mddev. */
	struct mddev *mddev; /* pointing back to mddev. */
	int mode;
};
};


struct suspend_info {
struct suspend_info {
@@ -53,8 +54,8 @@ struct md_cluster_info {
	dlm_lockspace_t *lockspace;
	dlm_lockspace_t *lockspace;
	int slot_number;
	int slot_number;
	struct completion completion;
	struct completion completion;
	struct mutex sb_mutex;
	struct dlm_lock_resource *bitmap_lockres;
	struct dlm_lock_resource *bitmap_lockres;
	struct dlm_lock_resource *resync_lockres;
	struct list_head suspend_list;
	struct list_head suspend_list;
	spinlock_t suspend_lock;
	spinlock_t suspend_lock;
	struct md_thread *recovery_thread;
	struct md_thread *recovery_thread;
@@ -79,20 +80,20 @@ enum msg_type {
};
};


struct cluster_msg {
struct cluster_msg {
	int type;
	__le32 type;
	int slot;
	__le32 slot;
	/* TODO: Unionize this for smaller footprint */
	/* TODO: Unionize this for smaller footprint */
	sector_t low;
	__le64 low;
	sector_t high;
	__le64 high;
	char uuid[16];
	char uuid[16];
	int raid_slot;
	__le32 raid_slot;
};
};


static void sync_ast(void *arg)
static void sync_ast(void *arg)
{
{
	struct dlm_lock_resource *res;
	struct dlm_lock_resource *res;


	res = (struct dlm_lock_resource *) arg;
	res = arg;
	complete(&res->completion);
	complete(&res->completion);
}
}


@@ -106,6 +107,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
	if (ret)
	if (ret)
		return ret;
		return ret;
	wait_for_completion(&res->completion);
	wait_for_completion(&res->completion);
	if (res->lksb.sb_status == 0)
		res->mode = mode;
	return res->lksb.sb_status;
	return res->lksb.sb_status;
}
}


@@ -127,6 +130,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
	init_completion(&res->completion);
	init_completion(&res->completion);
	res->ls = cinfo->lockspace;
	res->ls = cinfo->lockspace;
	res->mddev = mddev;
	res->mddev = mddev;
	res->mode = DLM_LOCK_IV;
	namelen = strlen(name);
	namelen = strlen(name);
	res->name = kzalloc(namelen + 1, GFP_KERNEL);
	res->name = kzalloc(namelen + 1, GFP_KERNEL);
	if (!res->name) {
	if (!res->name) {
@@ -191,7 +195,7 @@ static void lockres_free(struct dlm_lock_resource *res)
	kfree(res);
	kfree(res);
}
}


static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
static void add_resync_info(struct dlm_lock_resource *lockres,
			    sector_t lo, sector_t hi)
			    sector_t lo, sector_t hi)
{
{
	struct resync_info *ri;
	struct resync_info *ri;
@@ -210,7 +214,7 @@ static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_loc
	dlm_lock_sync(lockres, DLM_LOCK_CR);
	dlm_lock_sync(lockres, DLM_LOCK_CR);
	memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
	memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
	hi = le64_to_cpu(ri.hi);
	hi = le64_to_cpu(ri.hi);
	if (ri.hi > 0) {
	if (hi > 0) {
		s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
		s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
		if (!s)
		if (!s)
			goto out;
			goto out;
@@ -345,7 +349,7 @@ static const struct dlm_lockspace_ops md_ls_ops = {
 */
 */
static void ack_bast(void *arg, int mode)
static void ack_bast(void *arg, int mode)
{
{
	struct dlm_lock_resource *res = (struct dlm_lock_resource *)arg;
	struct dlm_lock_resource *res = arg;
	struct md_cluster_info *cinfo = res->mddev->cluster_info;
	struct md_cluster_info *cinfo = res->mddev->cluster_info;


	if (mode == DLM_LOCK_EX)
	if (mode == DLM_LOCK_EX)
@@ -358,29 +362,32 @@ static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)


	list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
	list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
		if (slot == s->slot) {
		if (slot == s->slot) {
			pr_info("%s:%d Deleting suspend_info: %d\n",
					__func__, __LINE__, slot);
			list_del(&s->list);
			list_del(&s->list);
			kfree(s);
			kfree(s);
			break;
			break;
		}
		}
}
}


static void remove_suspend_info(struct md_cluster_info *cinfo, int slot)
static void remove_suspend_info(struct mddev *mddev, int slot)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	spin_lock_irq(&cinfo->suspend_lock);
	spin_lock_irq(&cinfo->suspend_lock);
	__remove_suspend_info(cinfo, slot);
	__remove_suspend_info(cinfo, slot);
	spin_unlock_irq(&cinfo->suspend_lock);
	spin_unlock_irq(&cinfo->suspend_lock);
	mddev->pers->quiesce(mddev, 2);
}
}




static void process_suspend_info(struct md_cluster_info *cinfo,
static void process_suspend_info(struct mddev *mddev,
		int slot, sector_t lo, sector_t hi)
		int slot, sector_t lo, sector_t hi)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct suspend_info *s;
	struct suspend_info *s;


	if (!hi) {
	if (!hi) {
		remove_suspend_info(cinfo, slot);
		remove_suspend_info(mddev, slot);
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
		return;
		return;
	}
	}
	s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
	s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
@@ -389,11 +396,14 @@ static void process_suspend_info(struct md_cluster_info *cinfo,
	s->slot = slot;
	s->slot = slot;
	s->lo = lo;
	s->lo = lo;
	s->hi = hi;
	s->hi = hi;
	mddev->pers->quiesce(mddev, 1);
	mddev->pers->quiesce(mddev, 0);
	spin_lock_irq(&cinfo->suspend_lock);
	spin_lock_irq(&cinfo->suspend_lock);
	/* Remove existing entry (if exists) before adding */
	/* Remove existing entry (if exists) before adding */
	__remove_suspend_info(cinfo, slot);
	__remove_suspend_info(cinfo, slot);
	list_add(&s->list, &cinfo->suspend_list);
	list_add(&s->list, &cinfo->suspend_list);
	spin_unlock_irq(&cinfo->suspend_lock);
	spin_unlock_irq(&cinfo->suspend_lock);
	mddev->pers->quiesce(mddev, 2);
}
}


static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
@@ -407,7 +417,7 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)


	len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
	len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
	sprintf(disk_uuid + len, "%pU", cmsg->uuid);
	sprintf(disk_uuid + len, "%pU", cmsg->uuid);
	snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
	snprintf(raid_slot, 16, "RAID_DISK=%d", le32_to_cpu(cmsg->raid_slot));
	pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
	pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
	init_completion(&cinfo->newdisk_completion);
	init_completion(&cinfo->newdisk_completion);
	set_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
	set_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
@@ -421,64 +431,59 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;

	md_reload_sb(mddev, le32_to_cpu(msg->raid_slot));
	md_reload_sb(mddev);
	dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
	dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
}
}


static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
{
{
	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
						   le32_to_cpu(msg->raid_slot));


	if (rdev)
	if (rdev)
		md_kick_rdev_from_array(rdev);
		md_kick_rdev_from_array(rdev);
	else
	else
		pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
		pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
			__func__, __LINE__, le32_to_cpu(msg->raid_slot));
}
}


static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
{
{
	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
	struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev,
						   le32_to_cpu(msg->raid_slot));


	if (rdev && test_bit(Faulty, &rdev->flags))
	if (rdev && test_bit(Faulty, &rdev->flags))
		clear_bit(Faulty, &rdev->flags);
		clear_bit(Faulty, &rdev->flags);
	else
	else
		pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
		pr_warn("%s: %d Could not find disk(%d) which is faulty",
			__func__, __LINE__, le32_to_cpu(msg->raid_slot));
}
}


static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
{
{
	switch (msg->type) {
	if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
		"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
		return;
	switch (le32_to_cpu(msg->type)) {
	case METADATA_UPDATED:
	case METADATA_UPDATED:
		pr_info("%s: %d Received message: METADATA_UPDATE from %d\n",
			__func__, __LINE__, msg->slot);
		process_metadata_update(mddev, msg);
		process_metadata_update(mddev, msg);
		break;
		break;
	case RESYNCING:
	case RESYNCING:
		pr_info("%s: %d Received message: RESYNCING from %d\n",
		process_suspend_info(mddev, le32_to_cpu(msg->slot),
			__func__, __LINE__, msg->slot);
				     le64_to_cpu(msg->low),
		process_suspend_info(mddev->cluster_info, msg->slot,
				     le64_to_cpu(msg->high));
				msg->low, msg->high);
		break;
		break;
	case NEWDISK:
	case NEWDISK:
		pr_info("%s: %d Received message: NEWDISK from %d\n",
			__func__, __LINE__, msg->slot);
		process_add_new_disk(mddev, msg);
		process_add_new_disk(mddev, msg);
		break;
		break;
	case REMOVE:
	case REMOVE:
		pr_info("%s: %d Received REMOVE from %d\n",
			__func__, __LINE__, msg->slot);
		process_remove_disk(mddev, msg);
		process_remove_disk(mddev, msg);
		break;
		break;
	case RE_ADD:
	case RE_ADD:
		pr_info("%s: %d Received RE_ADD from %d\n",
			__func__, __LINE__, msg->slot);
		process_readd_disk(mddev, msg);
		process_readd_disk(mddev, msg);
		break;
		break;
	case BITMAP_NEEDS_SYNC:
	case BITMAP_NEEDS_SYNC:
		pr_info("%s: %d Received BITMAP_NEEDS_SYNC from %d\n",
		__recover_slot(mddev, le32_to_cpu(msg->slot));
			__func__, __LINE__, msg->slot);
		__recover_slot(mddev, msg->slot);
		break;
		break;
	default:
	default:
		pr_warn("%s:%d Received unknown message from %d\n",
		pr_warn("%s:%d Received unknown message from %d\n",
@@ -528,11 +533,17 @@ static void recv_daemon(struct md_thread *thread)
/* lock_comm()
/* lock_comm()
 * Takes the lock on the TOKEN lock resource so no other
 * Takes the lock on the TOKEN lock resource so no other
 * node can communicate while the operation is underway.
 * node can communicate while the operation is underway.
 * If called again, and the TOKEN lock is alread in EX mode
 * return success. However, care must be taken that unlock_comm()
 * is called only once.
 */
 */
static int lock_comm(struct md_cluster_info *cinfo)
static int lock_comm(struct md_cluster_info *cinfo)
{
{
	int error;
	int error;


	if (cinfo->token_lockres->mode == DLM_LOCK_EX)
		return 0;

	error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
	error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
	if (error)
	if (error)
		pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
		pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
@@ -542,6 +553,7 @@ static int lock_comm(struct md_cluster_info *cinfo)


static void unlock_comm(struct md_cluster_info *cinfo)
static void unlock_comm(struct md_cluster_info *cinfo)
{
{
	WARN_ON(cinfo->token_lockres->mode != DLM_LOCK_EX);
	dlm_unlock_sync(cinfo->token_lockres);
	dlm_unlock_sync(cinfo->token_lockres);
}
}


@@ -696,7 +708,6 @@ static int join(struct mddev *mddev, int nodes)
	init_completion(&cinfo->completion);
	init_completion(&cinfo->completion);
	set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
	set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);


	mutex_init(&cinfo->sb_mutex);
	mddev->cluster_info = cinfo;
	mddev->cluster_info = cinfo;


	memset(str, 0, 64);
	memset(str, 0, 64);
@@ -753,6 +764,10 @@ static int join(struct mddev *mddev, int nodes)
		goto err;
		goto err;
	}
	}


	cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
	if (!cinfo->resync_lockres)
		goto err;

	ret = gather_all_resync_info(mddev, nodes);
	ret = gather_all_resync_info(mddev, nodes);
	if (ret)
	if (ret)
		goto err;
		goto err;
@@ -763,6 +778,7 @@ static int join(struct mddev *mddev, int nodes)
	lockres_free(cinfo->token_lockres);
	lockres_free(cinfo->token_lockres);
	lockres_free(cinfo->ack_lockres);
	lockres_free(cinfo->ack_lockres);
	lockres_free(cinfo->no_new_dev_lockres);
	lockres_free(cinfo->no_new_dev_lockres);
	lockres_free(cinfo->resync_lockres);
	lockres_free(cinfo->bitmap_lockres);
	lockres_free(cinfo->bitmap_lockres);
	if (cinfo->lockspace)
	if (cinfo->lockspace)
		dlm_release_lockspace(cinfo->lockspace, 2);
		dlm_release_lockspace(cinfo->lockspace, 2);
@@ -771,12 +787,32 @@ static int join(struct mddev *mddev, int nodes)
	return ret;
	return ret;
}
}


static void resync_bitmap(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg = {0};
	int err;

	cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
	err = sendmsg(cinfo, &cmsg);
	if (err)
		pr_err("%s:%d: failed to send BITMAP_NEEDS_SYNC message (%d)\n",
			__func__, __LINE__, err);
}

static int leave(struct mddev *mddev)
static int leave(struct mddev *mddev)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;


	if (!cinfo)
	if (!cinfo)
		return 0;
		return 0;

	/* BITMAP_NEEDS_SYNC message should be sent when node
	 * is leaving the cluster with dirty bitmap, also we
	 * can only deliver it when dlm connection is available */
	if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
		resync_bitmap(mddev);

	md_unregister_thread(&cinfo->recovery_thread);
	md_unregister_thread(&cinfo->recovery_thread);
	md_unregister_thread(&cinfo->recv_thread);
	md_unregister_thread(&cinfo->recv_thread);
	lockres_free(cinfo->message_lockres);
	lockres_free(cinfo->message_lockres);
@@ -799,15 +835,6 @@ static int slot_number(struct mddev *mddev)
	return cinfo->slot_number - 1;
	return cinfo->slot_number - 1;
}
}


static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;

	add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
	/* Re-acquire the lock to refresh LVB */
	dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
}

static int metadata_update_start(struct mddev *mddev)
static int metadata_update_start(struct mddev *mddev)
{
{
	return lock_comm(mddev->cluster_info);
	return lock_comm(mddev->cluster_info);
@@ -817,59 +844,62 @@ static int metadata_update_finish(struct mddev *mddev)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg;
	struct cluster_msg cmsg;
	int ret;
	struct md_rdev *rdev;
	int ret = 0;
	int raid_slot = -1;


	memset(&cmsg, 0, sizeof(cmsg));
	memset(&cmsg, 0, sizeof(cmsg));
	cmsg.type = cpu_to_le32(METADATA_UPDATED);
	cmsg.type = cpu_to_le32(METADATA_UPDATED);
	/* Pick up a good active device number to send.
	 */
	rdev_for_each(rdev, mddev)
		if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
			raid_slot = rdev->desc_nr;
			break;
		}
	if (raid_slot >= 0) {
		cmsg.raid_slot = cpu_to_le32(raid_slot);
		ret = __sendmsg(cinfo, &cmsg);
		ret = __sendmsg(cinfo, &cmsg);
	} else
		pr_warn("md-cluster: No good device id found to send\n");
	unlock_comm(cinfo);
	unlock_comm(cinfo);
	return ret;
	return ret;
}
}


static int metadata_update_cancel(struct mddev *mddev)
static void metadata_update_cancel(struct mddev *mddev)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	unlock_comm(cinfo);
}


	return dlm_unlock_sync(cinfo->token_lockres);
static int resync_start(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
	return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
}
}


static int resync_send(struct mddev *mddev, enum msg_type type,
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
		sector_t lo, sector_t hi)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg;
	struct cluster_msg cmsg = {0};
	int slot = cinfo->slot_number - 1;


	pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
	add_resync_info(cinfo->bitmap_lockres, lo, hi);
			(unsigned long long)lo,
	/* Re-acquire the lock to refresh LVB */
			(unsigned long long)hi);
	dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
	resync_info_update(mddev, lo, hi);
	cmsg.type = cpu_to_le32(RESYNCING);
	cmsg.type = cpu_to_le32(type);
	cmsg.slot = cpu_to_le32(slot);
	cmsg.low = cpu_to_le64(lo);
	cmsg.low = cpu_to_le64(lo);
	cmsg.high = cpu_to_le64(hi);
	cmsg.high = cpu_to_le64(hi);
	return sendmsg(cinfo, &cmsg);
}


static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
	return sendmsg(cinfo, &cmsg);
{
	pr_info("%s:%d\n", __func__, __LINE__);
	return resync_send(mddev, RESYNCING, lo, hi);
}
}


static void resync_finish(struct mddev *mddev)
static int resync_finish(struct mddev *mddev)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg;
	cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
	int slot = cinfo->slot_number - 1;
	dlm_unlock_sync(cinfo->resync_lockres);

	return resync_info_update(mddev, 0, 0);
	pr_info("%s:%d\n", __func__, __LINE__);
	resync_send(mddev, RESYNCING, 0, 0);
	if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
		cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
		cmsg.slot = cpu_to_le32(slot);
		sendmsg(cinfo, &cmsg);
	}
}
}


static int area_resyncing(struct mddev *mddev, int direction,
static int area_resyncing(struct mddev *mddev, int direction,
@@ -896,7 +926,11 @@ static int area_resyncing(struct mddev *mddev, int direction,
	return ret;
	return ret;
}
}


static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
/* add_new_disk() - initiates a disk add
 * However, if this fails before writing md_update_sb(),
 * add_new_disk_cancel() must be called to release token lock
 */
static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
{
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg;
	struct cluster_msg cmsg;
@@ -907,7 +941,7 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
	memset(&cmsg, 0, sizeof(cmsg));
	memset(&cmsg, 0, sizeof(cmsg));
	cmsg.type = cpu_to_le32(NEWDISK);
	cmsg.type = cpu_to_le32(NEWDISK);
	memcpy(cmsg.uuid, uuid, 16);
	memcpy(cmsg.uuid, uuid, 16);
	cmsg.raid_slot = rdev->desc_nr;
	cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
	lock_comm(cinfo);
	lock_comm(cinfo);
	ret = __sendmsg(cinfo, &cmsg);
	ret = __sendmsg(cinfo, &cmsg);
	if (ret)
	if (ret)
@@ -918,22 +952,17 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
	/* Some node does not "see" the device */
	/* Some node does not "see" the device */
	if (ret == -EAGAIN)
	if (ret == -EAGAIN)
		ret = -ENOENT;
		ret = -ENOENT;
	if (ret)
		unlock_comm(cinfo);
	else
	else
		dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
		dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
	return ret;
	return ret;
}
}


static int add_new_disk_finish(struct mddev *mddev)
static void add_new_disk_cancel(struct mddev *mddev)
{
{
	struct cluster_msg cmsg;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	int ret;
	/* Write sb and inform others */
	md_update_sb(mddev, 1);
	cmsg.type = METADATA_UPDATED;
	ret = __sendmsg(cinfo, &cmsg);
	unlock_comm(cinfo);
	unlock_comm(cinfo);
	return ret;
}
}


static int new_disk_ack(struct mddev *mddev, bool ack)
static int new_disk_ack(struct mddev *mddev, bool ack)
@@ -953,10 +982,10 @@ static int new_disk_ack(struct mddev *mddev, bool ack)


static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
{
{
	struct cluster_msg cmsg;
	struct cluster_msg cmsg = {0};
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	cmsg.type = REMOVE;
	cmsg.type = cpu_to_le32(REMOVE);
	cmsg.raid_slot = rdev->desc_nr;
	cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
	return __sendmsg(cinfo, &cmsg);
	return __sendmsg(cinfo, &cmsg);
}
}


@@ -964,12 +993,12 @@ static int gather_bitmaps(struct md_rdev *rdev)
{
{
	int sn, err;
	int sn, err;
	sector_t lo, hi;
	sector_t lo, hi;
	struct cluster_msg cmsg;
	struct cluster_msg cmsg = {0};
	struct mddev *mddev = rdev->mddev;
	struct mddev *mddev = rdev->mddev;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct md_cluster_info *cinfo = mddev->cluster_info;


	cmsg.type = RE_ADD;
	cmsg.type = cpu_to_le32(RE_ADD);
	cmsg.raid_slot = rdev->desc_nr;
	cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
	err = sendmsg(cinfo, &cmsg);
	err = sendmsg(cinfo, &cmsg);
	if (err)
	if (err)
		goto out;
		goto out;
@@ -993,15 +1022,15 @@ static struct md_cluster_operations cluster_ops = {
	.join   = join,
	.join   = join,
	.leave  = leave,
	.leave  = leave,
	.slot_number = slot_number,
	.slot_number = slot_number,
	.resync_info_update = resync_info_update,
	.resync_start = resync_start,
	.resync_start = resync_start,
	.resync_finish = resync_finish,
	.resync_finish = resync_finish,
	.resync_info_update = resync_info_update,
	.metadata_update_start = metadata_update_start,
	.metadata_update_start = metadata_update_start,
	.metadata_update_finish = metadata_update_finish,
	.metadata_update_finish = metadata_update_finish,
	.metadata_update_cancel = metadata_update_cancel,
	.metadata_update_cancel = metadata_update_cancel,
	.area_resyncing = area_resyncing,
	.area_resyncing = area_resyncing,
	.add_new_disk_start = add_new_disk_start,
	.add_new_disk = add_new_disk,
	.add_new_disk_finish = add_new_disk_finish,
	.add_new_disk_cancel = add_new_disk_cancel,
	.new_disk_ack = new_disk_ack,
	.new_disk_ack = new_disk_ack,
	.remove_disk = remove_disk,
	.remove_disk = remove_disk,
	.gather_bitmaps = gather_bitmaps,
	.gather_bitmaps = gather_bitmaps,
@@ -1022,5 +1051,6 @@ static void cluster_exit(void)


module_init(cluster_init);
module_init(cluster_init);
module_exit(cluster_exit);
module_exit(cluster_exit);
MODULE_AUTHOR("SUSE");
MODULE_LICENSE("GPL");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Clustering support for MD");
MODULE_DESCRIPTION("Clustering support for MD");
+6 −6
Original line number Original line Diff line number Diff line
@@ -12,15 +12,15 @@ struct md_cluster_operations {
	int (*join)(struct mddev *mddev, int nodes);
	int (*join)(struct mddev *mddev, int nodes);
	int (*leave)(struct mddev *mddev);
	int (*leave)(struct mddev *mddev);
	int (*slot_number)(struct mddev *mddev);
	int (*slot_number)(struct mddev *mddev);
	void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
	int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
	int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
	void (*resync_finish)(struct mddev *mddev);
	int (*metadata_update_start)(struct mddev *mddev);
	int (*metadata_update_start)(struct mddev *mddev);
	int (*metadata_update_finish)(struct mddev *mddev);
	int (*metadata_update_finish)(struct mddev *mddev);
	int (*metadata_update_cancel)(struct mddev *mddev);
	void (*metadata_update_cancel)(struct mddev *mddev);
	int (*resync_start)(struct mddev *mddev);
	int (*resync_finish)(struct mddev *mddev);
	int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
	int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
	int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
	int (*add_new_disk)(struct mddev *mddev, struct md_rdev *rdev);
	int (*add_new_disk_finish)(struct mddev *mddev);
	void (*add_new_disk_cancel)(struct mddev *mddev);
	int (*new_disk_ack)(struct mddev *mddev, bool ack);
	int (*new_disk_ack)(struct mddev *mddev, bool ack);
	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
	int (*gather_bitmaps)(struct md_rdev *rdev);
	int (*gather_bitmaps)(struct md_rdev *rdev);
Loading