Merge tag 'md/4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md (feaa7cb5) · Commits · e / devices / android_kernel_fairphone_FP3

Documentation/md-cluster.txt

+6 −0

Original line number	Diff line number	Diff line
		@@ -316,3 +316,9 @@ The algorithm is:
		nodes are using the raid which is achieved by lock all bitmap
		locks within the cluster, and also those locks are unlocked
		accordingly.

		7. Unsupported features

		There are somethings which are not supported by cluster MD yet.

		- update size and change array_sectors.

drivers/md/bitmap.c

+77 −11

Original line number	Diff line number	Diff line
		@@ -46,7 +46,7 @@ static inline char bmname(struct bitmap bitmap)
		* allocated while we're using it
		*/
		static int bitmap_checkpage(struct bitmap_counts *bitmap,
		unsigned long page, int create)
		unsigned long page, int create, int no_hijack)
		__releases(bitmap->lock)
		__acquires(bitmap->lock)
		{
		@@ -90,6 +90,9 @@ __acquires(bitmap->lock)

		if (mappage == NULL) {
		pr_debug("md/bitmap: map page allocation failed, hijacking\n");
		/* We don't support hijack for cluster raid */
		if (no_hijack)
		return -ENOMEM;
		/* failed - set the hijacked flag so that we can use the
		* pointer as a counter */
		if (!bitmap->bp[page].map)
		@@ -756,7 +759,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
		bytes += sizeof(bitmap_super_t);

		num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
		offset = slot_number * (num_pages - 1);
		offset = slot_number * num_pages;

		store->filemap = kmalloc(sizeof(struct page *)
		* num_pages, GFP_KERNEL);
		@@ -900,6 +903,11 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
		struct page *page;
		void *kaddr;
		unsigned long chunk = block >> bitmap->counts.chunkshift;
		struct bitmap_storage *store = &bitmap->storage;
		unsigned long node_offset = 0;

		if (mddev_is_clustered(bitmap->mddev))
		node_offset = bitmap->cluster_slot * store->file_pages;

		page = filemap_get_page(&bitmap->storage, chunk);
		if (!page)
		@@ -915,7 +923,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
		kunmap_atomic(kaddr);
		pr_debug("set file bit %lu page %lu\n", bit, page->index);
		/* record page number so it gets flushed to disk when unplug occurs */
		set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
		set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY);
		}

		static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
		@@ -924,6 +932,11 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
		struct page *page;
		void *paddr;
		unsigned long chunk = block >> bitmap->counts.chunkshift;
		struct bitmap_storage *store = &bitmap->storage;
		unsigned long node_offset = 0;

		if (mddev_is_clustered(bitmap->mddev))
		node_offset = bitmap->cluster_slot * store->file_pages;

		page = filemap_get_page(&bitmap->storage, chunk);
		if (!page)
		@@ -935,8 +948,8 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
		else
		clear_bit_le(bit, paddr);
		kunmap_atomic(paddr);
		if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
		set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
		if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
		set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING);
		bitmap->allclean = 0;
		}
		}
		@@ -1321,7 +1334,7 @@ __acquires(bitmap->lock)
		sector_t csize;
		int err;

		err = bitmap_checkpage(bitmap, page, create);
		err = bitmap_checkpage(bitmap, page, create, 0);

		if (bitmap->bp[page].hijacked \|\|
		bitmap->bp[page].map == NULL)
		@@ -1594,6 +1607,27 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
		}
		EXPORT_SYMBOL(bitmap_cond_end_sync);

		void bitmap_sync_with_cluster(struct mddev *mddev,
		sector_t old_lo, sector_t old_hi,
		sector_t new_lo, sector_t new_hi)
		{
		struct bitmap *bitmap = mddev->bitmap;
		sector_t sector, blocks = 0;

		for (sector = old_lo; sector < new_lo; ) {
		bitmap_end_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
		}
		WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n");

		for (sector = old_hi; sector < new_hi; ) {
		bitmap_start_sync(bitmap, sector, &blocks, 0);
		sector += blocks;
		}
		WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n");
		}
		EXPORT_SYMBOL(bitmap_sync_with_cluster);

		static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
		{
		/* For each chunk covered by any of these sectors, set the
		@@ -1814,6 +1848,9 @@ int bitmap_load(struct mddev *mddev)
		if (!bitmap)
		goto out;

		if (mddev_is_clustered(mddev))
		md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);

		/* Clear out old bitmap info first: Either there is none, or we
		* are resuming after someone else has possibly changed things,
		* so we should forget old cached info.
		@@ -1890,14 +1927,14 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,

		if (clear_bits) {
		bitmap_update_sb(bitmap);
		/* Setting this for the ev_page should be enough.
		* And we do not require both write_all and PAGE_DIRT either
		*/
		/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
		* BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */
		for (i = 0; i < bitmap->storage.file_pages; i++)
		set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
		bitmap_write_all(bitmap);
		if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING))
		set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE);
		bitmap_unplug(bitmap);
		}
		bitmap_unplug(mddev->bitmap);
		*low = lo;
		*high = hi;
		err:
		@@ -2032,6 +2069,35 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
		chunks << chunkshift);

		spin_lock_irq(&bitmap->counts.lock);
		/* For cluster raid, need to pre-allocate bitmap */
		if (mddev_is_clustered(bitmap->mddev)) {
		unsigned long page;
		for (page = 0; page < pages; page++) {
		ret = bitmap_checkpage(&bitmap->counts, page, 1, 1);
		if (ret) {
		unsigned long k;

		/* deallocate the page memory */
		for (k = 0; k < page; k++) {
		kfree(new_bp[k].map);
		}

		/* restore some fields from old_counts */
		bitmap->counts.bp = old_counts.bp;
		bitmap->counts.pages = old_counts.pages;
		bitmap->counts.missing_pages = old_counts.pages;
		bitmap->counts.chunkshift = old_counts.chunkshift;
		bitmap->counts.chunks = old_counts.chunks;
		bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
		BITMAP_BLOCK_SHIFT);
		blocks = old_counts.chunks << old_counts.chunkshift;
		pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
		break;
		} else
		bitmap->counts.bp[page].count += 1;
		}
		}

		for (block = 0; block < blocks; ) {
		bitmap_counter_t bmc_old, bmc_new;
		int set;

drivers/md/bitmap.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -258,6 +258,9 @@ int bitmap_start_sync(struct bitmap bitmap, sector_t offset, sector_t blocks,
		void bitmap_end_sync(struct bitmap bitmap, sector_t offset, sector_t blocks, int aborted);
		void bitmap_close_sync(struct bitmap *bitmap);
		void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force);
		void bitmap_sync_with_cluster(struct mddev *mddev,
		sector_t old_lo, sector_t old_hi,
		sector_t new_lo, sector_t new_hi);

		void bitmap_unplug(struct bitmap *bitmap);
		void bitmap_daemon_work(struct mddev *mddev);

drivers/md/md-cluster.c

+79 −17

Original line number	Diff line number	Diff line
		@@ -61,6 +61,10 @@ struct resync_info {
		* the lock.
		*/
		#define MD_CLUSTER_SEND_LOCKED_ALREADY 5
		/* We should receive message after node joined cluster and
		* set up all the related infos such as bitmap and personality */
		#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
		#define MD_CLUSTER_PENDING_RECV_EVENT 7


		struct md_cluster_info {
		@@ -85,6 +89,9 @@ struct md_cluster_info {
		struct completion newdisk_completion;
		wait_queue_head_t wait;
		unsigned long state;
		/* record the region in RESYNCING message */
		sector_t sync_low;
		sector_t sync_hi;
		};

		enum msg_type {
		@@ -284,11 +291,14 @@ static void recover_bitmaps(struct md_thread *thread)
		goto dlm_unlock;
		}
		if (hi > 0) {
		/* TODO:Wait for current resync to get over */
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		if (lo < mddev->recovery_cp)
		mddev->recovery_cp = lo;
		md_check_recovery(mddev);
		/* wake up thread to continue resync in case resync
		* is not finished */
		if (mddev->recovery_cp != MaxSector) {
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
		}
		}
		dlm_unlock:
		dlm_unlock_sync(bm_lockres);
		@@ -370,8 +380,12 @@ static void ack_bast(void *arg, int mode)
		struct dlm_lock_resource *res = arg;
		struct md_cluster_info *cinfo = res->mddev->cluster_info;

		if (mode == DLM_LOCK_EX)
		if (mode == DLM_LOCK_EX) {
		if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
		md_wakeup_thread(cinfo->recv_thread);
		else
		set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
		}
		}

		static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
		@@ -408,6 +422,30 @@ static void process_suspend_info(struct mddev *mddev,
		md_wakeup_thread(mddev->thread);
		return;
		}

		/*
		* The bitmaps are not same for different nodes
		* if RESYNCING is happening in one node, then
		* the node which received the RESYNCING message
		* probably will perform resync with the region
		* [lo, hi] again, so we could reduce resync time
		* a lot if we can ensure that the bitmaps among
		* different nodes are match up well.
		*
		* sync_low/hi is used to record the region which
		* arrived in the previous RESYNCING message,
		*
		* Call bitmap_sync_with_cluster to clear
		* NEEDED_MASK and set RESYNC_MASK since
		* resync thread is running in another node,
		* so we don't need to do the resync again
		* with the same section */
		bitmap_sync_with_cluster(mddev, cinfo->sync_low,
		cinfo->sync_hi,
		lo, hi);
		cinfo->sync_low = lo;
		cinfo->sync_hi = hi;

		s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
		if (!s)
		return;
		@@ -482,11 +520,13 @@ static void process_readd_disk(struct mddev mddev, struct cluster_msg msg)
		__func__, __LINE__, le32_to_cpu(msg->raid_slot));
		}

		static void process_recvd_msg(struct mddev mddev, struct cluster_msg msg)
		static int process_recvd_msg(struct mddev mddev, struct cluster_msg msg)
		{
		int ret = 0;

		if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
		"node %d received it's own msg\n", le32_to_cpu(msg->slot)))
		return;
		return -1;
		switch (le32_to_cpu(msg->type)) {
		case METADATA_UPDATED:
		process_metadata_update(mddev, msg);
		@@ -509,9 +549,11 @@ static void process_recvd_msg(struct mddev mddev, struct cluster_msg msg)
		__recover_slot(mddev, le32_to_cpu(msg->slot));
		break;
		default:
		ret = -1;
		pr_warn("%s:%d Received unknown message from %d\n",
		__func__, __LINE__, msg->slot);
		}
		return ret;
		}

		/*
		@@ -535,7 +577,9 @@ static void recv_daemon(struct md_thread *thread)

		/* read lvb and wake up thread to process this message_lockres */
		memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
		process_recvd_msg(thread->mddev, &msg);
		ret = process_recvd_msg(thread->mddev, &msg);
		if (ret)
		goto out;

		/release CR on ack_lockres/
		ret = dlm_unlock_sync(ack_lockres);
		@@ -549,6 +593,7 @@ static void recv_daemon(struct md_thread *thread)
		ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
		if (unlikely(ret != 0))
		pr_info("lock CR on ack failed return %d\n", ret);
		out:
		/release CR on message_lockres/
		ret = dlm_unlock_sync(message_lockres);
		if (unlikely(ret != 0))
		@@ -778,17 +823,24 @@ static int join(struct mddev *mddev, int nodes)
		cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
		if (!cinfo->token_lockres)
		goto err;
		cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
		if (!cinfo->ack_lockres)
		goto err;
		cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
		if (!cinfo->no_new_dev_lockres)
		goto err;

		ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
		if (ret) {
		ret = -EAGAIN;
		pr_err("md-cluster: can't join cluster to avoid lock issue\n");
		goto err;
		}
		cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
		if (!cinfo->ack_lockres)
		goto err;
		/* get sync CR lock on ACK. */
		if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
		pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
		ret);
		dlm_unlock_sync(cinfo->token_lockres);
		/* get sync CR lock on no-new-dev. */
		if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
		pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
		@@ -809,12 +861,10 @@ static int join(struct mddev *mddev, int nodes)
		if (!cinfo->resync_lockres)
		goto err;

		ret = gather_all_resync_info(mddev, nodes);
		if (ret)
		goto err;

		return 0;
		err:
		md_unregister_thread(&cinfo->recovery_thread);
		md_unregister_thread(&cinfo->recv_thread);
		lockres_free(cinfo->message_lockres);
		lockres_free(cinfo->token_lockres);
		lockres_free(cinfo->ack_lockres);
		@@ -828,6 +878,19 @@ static int join(struct mddev *mddev, int nodes)
		return ret;
		}

		static void load_bitmaps(struct mddev *mddev, int total_slots)
		{
		struct md_cluster_info *cinfo = mddev->cluster_info;

		/* load all the node's bitmap info for resync */
		if (gather_all_resync_info(mddev, total_slots))
		pr_err("md-cluster: failed to gather all resyn infos\n");
		set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
		/* wake up recv thread in case something need to be handled */
		if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
		md_wakeup_thread(cinfo->recv_thread);
		}

		static void resync_bitmap(struct mddev *mddev)
		{
		struct md_cluster_info *cinfo = mddev->cluster_info;
		@@ -937,7 +1000,6 @@ static void metadata_update_cancel(struct mddev *mddev)
		static int resync_start(struct mddev *mddev)
		{
		struct md_cluster_info *cinfo = mddev->cluster_info;
		cinfo->resync_lockres->flags \|= DLM_LKF_NOQUEUE;
		return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
		}

		@@ -967,7 +1029,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
		static int resync_finish(struct mddev *mddev)
		{
		struct md_cluster_info *cinfo = mddev->cluster_info;
		cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
		dlm_unlock_sync(cinfo->resync_lockres);
		return resync_info_update(mddev, 0, 0);
		}
		@@ -1171,6 +1232,7 @@ static struct md_cluster_operations cluster_ops = {
		.add_new_disk_cancel = add_new_disk_cancel,
		.new_disk_ack = new_disk_ack,
		.remove_disk = remove_disk,
		.load_bitmaps = load_bitmaps,
		.gather_bitmaps = gather_bitmaps,
		.lock_all_bitmaps = lock_all_bitmaps,
		.unlock_all_bitmaps = unlock_all_bitmaps,

drivers/md/md-cluster.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -23,6 +23,7 @@ struct md_cluster_operations {
		void (add_new_disk_cancel)(struct mddev mddev);
		int (new_disk_ack)(struct mddev mddev, bool ack);
		int (remove_disk)(struct mddev mddev, struct md_rdev *rdev);
		void (load_bitmaps)(struct mddev mddev, int total_slots);
		int (gather_bitmaps)(struct md_rdev rdev);
		int (lock_all_bitmaps)(struct mddev mddev);
		void (unlock_all_bitmaps)(struct mddev mddev);

Original line number	Diff line number	Diff line
		@@ -23,6 +23,7 @@ struct md_cluster_operations {
		void (add_new_disk_cancel)(struct mddev mddev);
		int (new_disk_ack)(struct mddev mddev, bool ack);
		int (remove_disk)(struct mddev mddev, struct md_rdev *rdev);
		void (load_bitmaps)(struct mddev mddev, int total_slots);
		int (gather_bitmaps)(struct md_rdev rdev);
		int (lock_all_bitmaps)(struct mddev mddev);
		void (unlock_all_bitmaps)(struct mddev mddev);