Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit feaa7cb5 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull MD updates from Shaohua Li:
 "Several patches from Guoqing fixing md-cluster bugs and several
  patches from Heinz fixing dm-raid bugs"

* tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md-cluster: check the return value of process_recvd_msg
  md-cluster: gather resync infos and enable recv_thread after bitmap is ready
  md: set MD_CHANGE_PENDING in a atomic region
  md: raid5: add prerequisite to run underneath dm-raid
  md: raid10: add prerequisite to run underneath dm-raid
  md: md.c: fix oops in mddev_suspend for raid0
  md-cluster: fix ifnullfree.cocci warnings
  md-cluster/bitmap: unplug bitmap to sync dirty pages to disk
  md-cluster/bitmap: fix wrong page num in bitmap_file_clear_bit and bitmap_file_set_bit
  md-cluster/bitmap: fix wrong calcuation of offset
  md-cluster: sync bitmap when node received RESYNCING msg
  md-cluster: always setup in-memory bitmap
  md-cluster: wakeup thread if activated a spare disk
  md-cluster: change array_sectors and update size are not supported
  md-cluster: fix locking when node joins cluster during message broadcast
  md-cluster: unregister thread if err happened
  md-cluster: wake up thread to continue recovery
  md-cluser: make resync_finish only called after pers->sync_request
  md-cluster: change resync lock from asynchronous to synchronous
parents e0fb1b36 1fa9a1ad
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -316,3 +316,9 @@ The algorithm is:
 nodes are using the raid which is achieved by lock all bitmap
 locks within the cluster, and also those locks are unlocked
 accordingly.

7. Unsupported features

There are somethings which are not supported by cluster MD yet.

- update size and change array_sectors.
+77 −11
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ static inline char *bmname(struct bitmap *bitmap)
 * allocated while we're using it
 */
static int bitmap_checkpage(struct bitmap_counts *bitmap,
			    unsigned long page, int create)
			    unsigned long page, int create, int no_hijack)
__releases(bitmap->lock)
__acquires(bitmap->lock)
{
@@ -90,6 +90,9 @@ __acquires(bitmap->lock)

	if (mappage == NULL) {
		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
		/* We don't support hijack for cluster raid */
		if (no_hijack)
			return -ENOMEM;
		/* failed - set the hijacked flag so that we can use the
		 * pointer as a counter */
		if (!bitmap->bp[page].map)
@@ -756,7 +759,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
		bytes += sizeof(bitmap_super_t);

	num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
	offset = slot_number * (num_pages - 1);
	offset = slot_number * num_pages;

	store->filemap = kmalloc(sizeof(struct page *)
				 * num_pages, GFP_KERNEL);
@@ -900,6 +903,11 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
	struct page *page;
	void *kaddr;
	unsigned long chunk = block >> bitmap->counts.chunkshift;
	struct bitmap_storage *store = &bitmap->storage;
	unsigned long node_offset = 0;

	if (mddev_is_clustered(bitmap->mddev))
		node_offset = bitmap->cluster_slot * store->file_pages;

	page = filemap_get_page(&bitmap->storage, chunk);
	if (!page)
@@ -915,7 +923,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
	kunmap_atomic(kaddr);
	pr_debug("set file bit %lu page %lu\n", bit, page->index);
	/* record page number so it gets flushed to disk when unplug occurs */
	set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
	set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
}

static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
@@ -924,6 +932,11 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
	struct page *page;
	void *paddr;
	unsigned long chunk = block >> bitmap->counts.chunkshift;
	struct bitmap_storage *store = &bitmap->storage;
	unsigned long node_offset = 0;

	if (mddev_is_clustered(bitmap->mddev))
		node_offset = bitmap->cluster_slot * store->file_pages;

	page = filemap_get_page(&bitmap->storage, chunk);
	if (!page)
@@ -935,8 +948,8 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
	else
		clear_bit_le(bit, paddr);
	kunmap_atomic(paddr);
	if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
		set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
	if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
		set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
		bitmap->allclean = 0;
	}
}
@@ -1321,7 +1334,7 @@ __acquires(bitmap->lock)
	sector_t csize;
	int err;

	err = bitmap_checkpage(bitmap, page, create);
	err = bitmap_checkpage(bitmap, page, create, 0);

	if (bitmap->bp[page].hijacked ||
	    bitmap->bp[page].map == NULL)
@@ -1594,6 +1607,27 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
}
EXPORT_SYMBOL(bitmap_cond_end_sync);

void bitmap_sync_with_cluster(struct mddev *mddev,
			      sector_t old_lo, sector_t old_hi,
			      sector_t new_lo, sector_t new_hi)
{
	struct bitmap *bitmap = mddev->bitmap;
	sector_t sector, blocks = 0;

	for (sector = old_lo; sector < new_lo; ) {
		bitmap_end_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");

	for (sector = old_hi; sector < new_hi; ) {
		bitmap_start_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
	}
	WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
}
EXPORT_SYMBOL(bitmap_sync_with_cluster);

static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
{
	/* For each chunk covered by any of these sectors, set the
@@ -1814,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
	if (!bitmap)
		goto out;

	if (mddev_is_clustered(mddev))
		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);

	/* Clear out old bitmap info first:  Either there is none, or we
	 * are resuming after someone else has possibly changed things,
	 * so we should forget old cached info.
@@ -1890,14 +1927,14 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,

	if (clear_bits) {
		bitmap_update_sb(bitmap);
		/* Setting this for the ev_page should be enough.
		 * And we do not require both write_all and PAGE_DIRT either
		 */
		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
		 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
		for (i = 0; i < bitmap->storage.file_pages; i++)
			set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
		bitmap_write_all(bitmap);
			if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
				set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
		bitmap_unplug(bitmap);
	}
	bitmap_unplug(mddev->bitmap);
	*low = lo;
	*high = hi;
err:
@@ -2032,6 +2069,35 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
		     chunks << chunkshift);

	spin_lock_irq(&bitmap->counts.lock);
	/* For cluster raid, need to pre-allocate bitmap */
	if (mddev_is_clustered(bitmap->mddev)) {
		unsigned long page;
		for (page = 0; page < pages; page++) {
			ret = bitmap_checkpage(&bitmap->counts, page, 1, 1);
			if (ret) {
				unsigned long k;

				/* deallocate the page memory */
				for (k = 0; k < page; k++) {
					kfree(new_bp[k].map);
				}

				/* restore some fields from old_counts */
				bitmap->counts.bp = old_counts.bp;
				bitmap->counts.pages = old_counts.pages;
				bitmap->counts.missing_pages = old_counts.pages;
				bitmap->counts.chunkshift = old_counts.chunkshift;
				bitmap->counts.chunks = old_counts.chunks;
				bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
									     BITMAP_BLOCK_SHIFT);
				blocks = old_counts.chunks << old_counts.chunkshift;
				pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
				break;
			} else
				bitmap->counts.bp[page].count += 1;
		}
	}

	for (block = 0; block < blocks; ) {
		bitmap_counter_t *bmc_old, *bmc_new;
		int set;
+3 −0
Original line number Diff line number Diff line
@@ -258,6 +258,9 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted);
void bitmap_close_sync(struct bitmap *bitmap);
void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
void bitmap_sync_with_cluster(struct mddev *mddev,
			      sector_t old_lo, sector_t old_hi,
			      sector_t new_lo, sector_t new_hi);

void bitmap_unplug(struct bitmap *bitmap);
void bitmap_daemon_work(struct mddev *mddev);
+79 −17
Original line number Diff line number Diff line
@@ -61,6 +61,10 @@ struct resync_info {
 * the lock.
 */
#define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
/* We should receive message after node joined cluster and
 * set up all the related infos such as bitmap and personality */
#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
#define		MD_CLUSTER_PENDING_RECV_EVENT		7


struct md_cluster_info {
@@ -85,6 +89,9 @@ struct md_cluster_info {
	struct completion newdisk_completion;
	wait_queue_head_t wait;
	unsigned long state;
	/* record the region in RESYNCING message */
	sector_t sync_low;
	sector_t sync_hi;
};

enum msg_type {
@@ -284,11 +291,14 @@ static void recover_bitmaps(struct md_thread *thread)
			goto dlm_unlock;
		}
		if (hi > 0) {
			/* TODO:Wait for current resync to get over */
			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			if (lo < mddev->recovery_cp)
				mddev->recovery_cp = lo;
			md_check_recovery(mddev);
			/* wake up thread to continue resync in case resync
			 * is not finished */
			if (mddev->recovery_cp != MaxSector) {
			    set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			    md_wakeup_thread(mddev->thread);
			}
		}
dlm_unlock:
		dlm_unlock_sync(bm_lockres);
@@ -370,8 +380,12 @@ static void ack_bast(void *arg, int mode)
	struct dlm_lock_resource *res = arg;
	struct md_cluster_info *cinfo = res->mddev->cluster_info;

	if (mode == DLM_LOCK_EX)
	if (mode == DLM_LOCK_EX) {
		if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
			md_wakeup_thread(cinfo->recv_thread);
		else
			set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
	}
}

static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
@@ -408,6 +422,30 @@ static void process_suspend_info(struct mddev *mddev,
		md_wakeup_thread(mddev->thread);
		return;
	}

	/*
	 * The bitmaps are not same for different nodes
	 * if RESYNCING is happening in one node, then
	 * the node which received the RESYNCING message
	 * probably will perform resync with the region
	 * [lo, hi] again, so we could reduce resync time
	 * a lot if we can ensure that the bitmaps among
	 * different nodes are match up well.
	 *
	 * sync_low/hi is used to record the region which
	 * arrived in the previous RESYNCING message,
	 *
	 * Call bitmap_sync_with_cluster to clear
	 * NEEDED_MASK and set RESYNC_MASK since
	 * resync thread is running in another node,
	 * so we don't need to do the resync again
	 * with the same section */
	bitmap_sync_with_cluster(mddev, cinfo->sync_low,
					cinfo->sync_hi,
					lo, hi);
	cinfo->sync_low = lo;
	cinfo->sync_hi = hi;

	s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
	if (!s)
		return;
@@ -482,11 +520,13 @@ static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
			__func__, __LINE__, le32_to_cpu(msg->raid_slot));
}

static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
{
	int ret = 0;

	if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
		"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
		return;
		return -1;
	switch (le32_to_cpu(msg->type)) {
	case METADATA_UPDATED:
		process_metadata_update(mddev, msg);
@@ -509,9 +549,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
		__recover_slot(mddev, le32_to_cpu(msg->slot));
		break;
	default:
		ret = -1;
		pr_warn("%s:%d Received unknown message from %d\n",
			__func__, __LINE__, msg->slot);
	}
	return ret;
}

/*
@@ -535,7 +577,9 @@ static void recv_daemon(struct md_thread *thread)

	/* read lvb and wake up thread to process this message_lockres */
	memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
	process_recvd_msg(thread->mddev, &msg);
	ret = process_recvd_msg(thread->mddev, &msg);
	if (ret)
		goto out;

	/*release CR on ack_lockres*/
	ret = dlm_unlock_sync(ack_lockres);
@@ -549,6 +593,7 @@ static void recv_daemon(struct md_thread *thread)
	ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
	if (unlikely(ret != 0))
		pr_info("lock CR on ack failed return %d\n", ret);
out:
	/*release CR on message_lockres*/
	ret = dlm_unlock_sync(message_lockres);
	if (unlikely(ret != 0))
@@ -778,17 +823,24 @@ static int join(struct mddev *mddev, int nodes)
	cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
	if (!cinfo->token_lockres)
		goto err;
	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
	if (!cinfo->ack_lockres)
		goto err;
	cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
	if (!cinfo->no_new_dev_lockres)
		goto err;

	ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
	if (ret) {
		ret = -EAGAIN;
		pr_err("md-cluster: can't join cluster to avoid lock issue\n");
		goto err;
	}
	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
	if (!cinfo->ack_lockres)
		goto err;
	/* get sync CR lock on ACK. */
	if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
		pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
				ret);
	dlm_unlock_sync(cinfo->token_lockres);
	/* get sync CR lock on no-new-dev. */
	if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
		pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
@@ -809,12 +861,10 @@ static int join(struct mddev *mddev, int nodes)
	if (!cinfo->resync_lockres)
		goto err;

	ret = gather_all_resync_info(mddev, nodes);
	if (ret)
		goto err;

	return 0;
err:
	md_unregister_thread(&cinfo->recovery_thread);
	md_unregister_thread(&cinfo->recv_thread);
	lockres_free(cinfo->message_lockres);
	lockres_free(cinfo->token_lockres);
	lockres_free(cinfo->ack_lockres);
@@ -828,6 +878,19 @@ static int join(struct mddev *mddev, int nodes)
	return ret;
}

static void load_bitmaps(struct mddev *mddev, int total_slots)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;

	/* load all the node's bitmap info for resync */
	if (gather_all_resync_info(mddev, total_slots))
		pr_err("md-cluster: failed to gather all resyn infos\n");
	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
	/* wake up recv thread in case something need to be handled */
	if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
		md_wakeup_thread(cinfo->recv_thread);
}

static void resync_bitmap(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -937,7 +1000,6 @@ static void metadata_update_cancel(struct mddev *mddev)
static int resync_start(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
	return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
}

@@ -967,7 +1029,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
static int resync_finish(struct mddev *mddev)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
	dlm_unlock_sync(cinfo->resync_lockres);
	return resync_info_update(mddev, 0, 0);
}
@@ -1171,6 +1232,7 @@ static struct md_cluster_operations cluster_ops = {
	.add_new_disk_cancel = add_new_disk_cancel,
	.new_disk_ack = new_disk_ack,
	.remove_disk = remove_disk,
	.load_bitmaps = load_bitmaps,
	.gather_bitmaps = gather_bitmaps,
	.lock_all_bitmaps = lock_all_bitmaps,
	.unlock_all_bitmaps = unlock_all_bitmaps,
+1 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@ struct md_cluster_operations {
	void (*add_new_disk_cancel)(struct mddev *mddev);
	int (*new_disk_ack)(struct mddev *mddev, bool ack);
	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
	int (*gather_bitmaps)(struct md_rdev *rdev);
	int (*lock_all_bitmaps)(struct mddev *mddev);
	void (*unlock_all_bitmaps)(struct mddev *mddev);
Loading