Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a4c32ed authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull MD updates from Shaohua Li:

 - a raid5 writeback cache feature.

   The goal is to aggregate writes to make full stripe write and reduce
   read-modify-write. It's helpful for workload which does sequential
   write and follows fsync for example. This feature is experimental and
   off by default right now.

 - FAILFAST support.

   This fails IOs to broken raid disks quickly, so can improve latency.
   It's mainly for DASD storage, but some patches help normal raid array
   too.

 - support bad block for raid array with external metadata

 - AVX2 instruction support for raid6 parity calculation

 - normalize MD info output

 - add missing blktrace

 - other bug fixes

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (66 commits)
  md: separate flags for superblock changes
  md: MD_RECOVERY_NEEDED is set for mddev->recovery
  md: takeover should clear unrelated bits
  md/r5cache: after recovery, increase journal seq by 10000
  md/raid5-cache: fix crc in rewrite_data_only_stripes()
  md/raid5-cache: no recovery is required when create super-block
  md: fix refcount problem on mddev when stopping array.
  md/r5cache: do r5c_update_log_state after log recovery
  md/raid5-cache: adjust the write position of the empty block if no data blocks
  md/r5cache: run_no_space_stripes() when R5C_LOG_CRITICAL == 0
  md/raid5: limit request size according to implementation limits
  md/raid5-cache: do not need to set STRIPE_PREREAD_ACTIVE repeatedly
  md/raid5-cache: remove the unnecessary next_cp_seq field from the r5l_log
  md/raid5-cache: release the stripe_head at the appropriate location
  md/raid5-cache: use ring add to prevent overflow
  md/raid5-cache: remove unnecessary function parameters
  raid5-cache: don't set STRIPE_R5C_PARTIAL_STRIPE flag while load stripe into cache
  raid5-cache: add another check conditon before replaying one stripe
  md/r5cache: enable IRQs on error path
  md/r5cache: handle alloc_page failure
  ...
parents b9f98bd4 20737738
Loading
Loading
Loading
Loading
+98 −68
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/seq_file.h>
#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"

@@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde

static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{
	struct md_rdev *rdev = NULL;
	struct md_rdev *rdev;
	struct block_device *bdev;
	struct mddev *mddev = bitmap->mddev;
	struct bitmap_storage *store = &bitmap->storage;

restart:
	rdev = NULL;
	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
		int size = PAGE_SIZE;
		loff_t offset = mddev->bitmap_info.offset;
@@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
			       page);
	}

	if (wait)
		md_super_wait(mddev);
	if (wait && md_super_wait(mddev) < 0)
		goto restart;
	return 0;

 bad_alignment:
@@ -405,7 +408,7 @@ static int read_page(struct file *file, unsigned long index,
		ret = -EIO;
out:
	if (ret)
		printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
		pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
		       (int)PAGE_SIZE,
		       (unsigned long long)index << PAGE_SHIFT,
		       ret);
@@ -416,6 +419,28 @@ static int read_page(struct file *file, unsigned long index,
 * bitmap file superblock operations
 */

/*
 * bitmap_wait_writes() should be called before writing any bitmap
 * blocks, to ensure previous writes, particularly from
 * bitmap_daemon_work(), have completed.
 */
static void bitmap_wait_writes(struct bitmap *bitmap)
{
	if (bitmap->storage.file)
		wait_event(bitmap->write_wait,
			   atomic_read(&bitmap->pending_writes)==0);
	else
		/* Note that we ignore the return value.  The writes
		 * might have failed, but that would just mean that
		 * some bits which should be cleared haven't been,
		 * which is safe.  The relevant bitmap blocks will
		 * probably get written again, but there is no great
		 * loss if they aren't.
		 */
		md_super_wait(bitmap->mddev);
}


/* update the event counter and sync the superblock to disk */
void bitmap_update_sb(struct bitmap *bitmap)
{
@@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap)
	if (!bitmap || !bitmap->storage.sb_page)
		return;
	sb = kmap_atomic(bitmap->storage.sb_page);
	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
	printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
	pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
	pr_debug("         magic: %08x\n", le32_to_cpu(sb->magic));
	pr_debug("       version: %d\n", le32_to_cpu(sb->version));
	pr_debug("          uuid: %08x.%08x.%08x.%08x\n",
		 *(__u32 *)(sb->uuid+0),
		 *(__u32 *)(sb->uuid+4),
		 *(__u32 *)(sb->uuid+8),
		 *(__u32 *)(sb->uuid+12));
	printk(KERN_DEBUG "        events: %llu\n",
	pr_debug("        events: %llu\n",
		 (unsigned long long) le64_to_cpu(sb->events));
	printk(KERN_DEBUG "events cleared: %llu\n",
	pr_debug("events cleared: %llu\n",
		 (unsigned long long) le64_to_cpu(sb->events_cleared));
	printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
	printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
	printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
	printk(KERN_DEBUG "     sync size: %llu KB\n",
	pr_debug("         state: %08x\n", le32_to_cpu(sb->state));
	pr_debug("     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
	pr_debug("  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
	pr_debug("     sync size: %llu KB\n",
		 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
	pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
	kunmap_atomic(sb);
}

@@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
	BUG_ON(!chunksize);
	if (!is_power_of_2(chunksize)) {
		kunmap_atomic(sb);
		printk(KERN_ERR "bitmap chunksize not a power of 2\n");
		pr_warn("bitmap chunksize not a power of 2\n");
		return -EINVAL;
	}
	sb->chunksize = cpu_to_le32(chunksize);

	daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
	if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
		printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
		pr_debug("Choosing daemon_sleep default (5 sec)\n");
		daemon_sleep = 5 * HZ;
	}
	sb->daemon_sleep = cpu_to_le32(daemon_sleep);
@@ -584,7 +609,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
		/* to 4k blocks */
		bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
		offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
		pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
		pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
			bitmap->cluster_slot, offset);
	}

@@ -634,7 +659,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	else if (write_behind > COUNTER_MAX)
		reason = "write-behind limit out of range (0 - 16383)";
	if (reason) {
		printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
		pr_warn("%s: invalid bitmap file superblock: %s\n",
			bmname(bitmap), reason);
		goto out;
	}
@@ -648,16 +673,13 @@ static int bitmap_read_sb(struct bitmap *bitmap)
		 * bitmap's UUID and event counter to the mddev's
		 */
		if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
			printk(KERN_INFO
			       "%s: bitmap superblock UUID mismatch\n",
			pr_warn("%s: bitmap superblock UUID mismatch\n",
				bmname(bitmap));
			goto out;
		}
		events = le64_to_cpu(sb->events);
		if (!nodes && (events < bitmap->mddev->events)) {
			printk(KERN_INFO
			       "%s: bitmap file is out of date (%llu < %llu) "
			       "-- forcing full recovery\n",
			pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
				bmname(bitmap), events,
				(unsigned long long) bitmap->mddev->events);
			set_bit(BITMAP_STALE, &bitmap->flags);
@@ -679,7 +701,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
		err = md_setup_cluster(bitmap->mddev, nodes);
		if (err) {
			pr_err("%s: Could not setup cluster service (%d)\n",
			pr_warn("%s: Could not setup cluster service (%d)\n",
				bmname(bitmap), err);
			goto out_no_sb;
		}
@@ -847,14 +869,12 @@ static void bitmap_file_kick(struct bitmap *bitmap)
				ptr = file_path(bitmap->storage.file,
					     path, PAGE_SIZE);

			printk(KERN_ALERT
			      "%s: kicking failed bitmap file %s from array!\n",
			pr_warn("%s: kicking failed bitmap file %s from array!\n",
				bmname(bitmap), IS_ERR(ptr) ? "" : ptr);

			kfree(path);
		} else
			printk(KERN_ALERT
			       "%s: disabling internal bitmap due to errors\n",
			pr_warn("%s: disabling internal bitmap due to errors\n",
				bmname(bitmap));
	}
}
@@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap)
{
	unsigned long i;
	int dirty, need_write;
	int writing = 0;

	if (!bitmap || !bitmap->storage.filemap ||
	    test_bit(BITMAP_STALE, &bitmap->flags))
@@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap)
		need_write = test_and_clear_page_attr(bitmap, i,
						      BITMAP_PAGE_NEEDWRITE);
		if (dirty || need_write) {
			if (!writing) {
				bitmap_wait_writes(bitmap);
				if (bitmap->mddev->queue)
					blk_add_trace_msg(bitmap->mddev->queue,
							  "md bitmap_unplug");
			}
			clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
			write_page(bitmap, bitmap->storage.filemap[i], 0);
			writing = 1;
		}
	}
	if (bitmap->storage.file)
		wait_event(bitmap->write_wait,
			   atomic_read(&bitmap->pending_writes)==0);
	else
		md_super_wait(bitmap->mddev);
	if (writing)
		bitmap_wait_writes(bitmap);

	if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
		bitmap_file_kick(bitmap);
@@ -1056,11 +1081,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)

	outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
	if (outofdate)
		printk(KERN_INFO "%s: bitmap file is out of date, doing full "
			"recovery\n", bmname(bitmap));
		pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));

	if (file && i_size_read(file->f_mapping->host) < store->bytes) {
		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
		pr_warn("%s: bitmap file too short %lu < %lu\n",
			bmname(bitmap),
			(unsigned long) i_size_read(file->f_mapping->host),
			store->bytes);
@@ -1137,15 +1161,14 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
		offset = 0;
	}

	printk(KERN_INFO "%s: bitmap initialized from disk: "
	       "read %lu pages, set %lu of %lu bits\n",
	pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
		 bmname(bitmap), store->file_pages,
		 bit_cnt, chunks);

	return 0;

 err:
	printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
	pr_warn("%s: bitmap initialisation failed: %d\n",
		bmname(bitmap), ret);
	return ret;
}
@@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev)
	}
	bitmap->allclean = 1;

	if (bitmap->mddev->queue)
		blk_add_trace_msg(bitmap->mddev->queue,
				  "md bitmap_daemon_work");

	/* Any file-page which is PENDING now needs to be written.
	 * So set NEEDWRITE now, then after we make any last-minute changes
	 * we will write it.
@@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev)
	}
	spin_unlock_irq(&counts->lock);

	bitmap_wait_writes(bitmap);
	/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
	 * DIRTY pages need to be written by bitmap_unplug so it can wait
	 * for them.
@@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
		   atomic_read(&bitmap->mddev->recovery_active) == 0);

	bitmap->mddev->curr_resync_completed = sector;
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
	set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
	sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
	s = 0;
	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
@@ -1825,7 +1853,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot)
	if (err)
		goto error;

	printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
	pr_debug("created bitmap (%lu pages) for device %s\n",
		 bitmap->counts.pages, bmname(bitmap));

	err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
@@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
					   !bitmap->mddev->bitmap_info.external,
					   mddev_is_clustered(bitmap->mddev)
					   ? bitmap->cluster_slot : 0);
	if (ret)
	if (ret) {
		bitmap_file_unmap(&store);
		goto err;
	}

	pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);

@@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
				bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
									     BITMAP_BLOCK_SHIFT);
				blocks = old_counts.chunks << old_counts.chunkshift;
				pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
				pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
				break;
			} else
				bitmap->counts.bp[page].count += 1;
@@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
		/* Ensure new bitmap info is stored in
		 * metadata promptly.
		 */
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
		md_wakeup_thread(mddev->thread);
	}
	rv = 0;
+2 −2
Original line number Diff line number Diff line
@@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
		sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);

		/* Force writing of superblocks to disk */
		set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
		set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags);

		/* Any superblock is better than none, choose that if given */
		return refdev ? 0 : 1;
@@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs)
	struct mddev *mddev = &rs->md;
	int ro = mddev->ro;

	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
	mddev->ro = 0;
	md_update_sb(mddev, 1);
	mddev->ro = ro;
+17 −14
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "linear.h"

@@ -101,7 +102,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
		sector_t sectors;

		if (j < 0 || j >= raid_disks || disk->rdev) {
			printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n",
			pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
				mdname(mddev));
			goto out;
		}
@@ -123,7 +124,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
			discard_supported = true;
	}
	if (cnt != raid_disks) {
		printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
		pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
			mdname(mddev));
		goto out;
	}
@@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
	}

	do {
		tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
		sector_t bio_sector = bio->bi_iter.bi_sector;
		tmp_dev = which_dev(mddev, bio_sector);
		start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
		end_sector = tmp_dev->end_sector;
		data_offset = tmp_dev->rdev->data_offset;
		bio->bi_bdev = tmp_dev->rdev->bdev;

		if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
			     bio->bi_iter.bi_sector < start_sector))
		if (unlikely(bio_sector >= end_sector ||
			     bio_sector < start_sector))
			goto out_of_bounds;

		if (unlikely(bio_end_sector(bio) > end_sector)) {
			/* This bio crosses a device boundary, so we have to
			 * split it.
			 */
			split = bio_split(bio, end_sector -
					  bio->bi_iter.bi_sector,
			split = bio_split(bio, end_sector - bio_sector,
					  GFP_NOIO, fs_bio_set);
			bio_chain(split, bio);
		} else {
@@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
			/* Just ignore it */
			bio_endio(split);
		} else
		} else {
			if (mddev->gendisk)
				trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
						      split, disk_devt(mddev->gendisk),
						      bio_sector);
			generic_make_request(split);
		}
	} while (split != bio);
	return;

out_of_bounds:
	printk(KERN_ERR
	       "md/linear:%s: make_request: Sector %llu out of bounds on "
	       "dev %s: %llu sectors, offset %llu\n",
	pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
	       mdname(mddev),
	       (unsigned long long)bio->bi_iter.bi_sector,
	       bdevname(tmp_dev->rdev->bdev, b),
@@ -275,7 +279,6 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)

static void linear_status (struct seq_file *seq, struct mddev *mddev)
{

	seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}

+364 −337

File changed.

Preview size limit exceeded, changes collapsed.

+67 −41
Original line number Diff line number Diff line
@@ -29,6 +29,16 @@

#define MaxSector (~(sector_t)0)

/*
 * These flags should really be called "NO_RETRY" rather than
 * "FAILFAST" because they don't make any promise about time lapse,
 * only about the number of retries, which will be zero.
 * REQ_FAILFAST_DRIVER is not included because
 * Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
 * seems to suggest that the errors it avoids retrying should usually
 * be retried.
 */
#define	MD_FAILFAST	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/*
 * MD's 'extended' device
 */
@@ -168,6 +178,19 @@ enum flag_bits {
				 * so it is safe to remove without
				 * another synchronize_rcu() call.
				 */
	ExternalBbl,            /* External metadata provides bad
				 * block management for a disk
				 */
	FailFast,		/* Minimal retries should be attempted on
				 * this device, so use REQ_FAILFAST_DEV.
				 * Also don't try to repair failed reads.
				 * It is expects that no bad block log
				 * is present.
				 */
	LastDev,		/* Seems to be the last working dev as
				 * it didn't fail, so don't use FailFast
				 * any more for metadata
				 */
};

static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
@@ -189,6 +212,31 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
				int is_new);
struct md_cluster_info;

enum mddev_flags {
	MD_ARRAY_FIRST_USE,	/* First use of array, needs initialization */
	MD_CLOSING,		/* If set, we are closing the array, do not open
				 * it then */
	MD_JOURNAL_CLEAN,	/* A raid with journal is already clean */
	MD_HAS_JOURNAL,		/* The raid array has journal feature set */
	MD_RELOAD_SB,		/* Reload the superblock because another node
				 * updated it.
				 */
	MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
				   * already took resync lock, need to
				   * release the lock */
	MD_FAILFAST_SUPPORTED,	/* Using MD_FAILFAST on metadata writes is
				 * supported as calls to md_error() will
				 * never cause the array to become failed.
				 */
};

enum mddev_sb_flags {
	MD_SB_CHANGE_DEVS,		/* Some device status has changed */
	MD_SB_CHANGE_CLEAN,	/* transition to or from 'clean' */
	MD_SB_CHANGE_PENDING,	/* switch from 'clean' to 'active' in progress */
	MD_SB_NEED_REWRITE,	/* metadata write needs to be repeated */
};

struct mddev {
	void				*private;
	struct md_personality		*pers;
@@ -196,21 +244,7 @@ struct mddev {
	int				md_minor;
	struct list_head		disks;
	unsigned long			flags;
#define MD_CHANGE_DEVS	0	/* Some device status has changed */
#define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2	/* switch from 'clean' to 'active' in progress */
#define MD_UPDATE_SB_FLAGS (1 | 2 | 4)	/* If these are set, md_update_sb needed */
#define MD_ARRAY_FIRST_USE 3    /* First use of array, needs initialization */
#define MD_CLOSING	4	/* If set, we are closing the array, do not open
				 * it then */
#define MD_JOURNAL_CLEAN 5	/* A raid with journal is already clean */
#define MD_HAS_JOURNAL	6	/* The raid array has journal feature set */
#define MD_RELOAD_SB	7	/* Reload the superblock because another node
				 * updated it.
				 */
#define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node
				    * already took resync lock, need to
				    * release the lock */
	unsigned long			sb_flags;

	int				suspended;
	atomic_t			active_io;
@@ -304,31 +338,6 @@ struct mddev {
	int				parallel_resync;

	int				ok_start_degraded;
	/* recovery/resync flags
	 * NEEDED:   we might need to start a resync/recover
	 * RUNNING:  a thread is running, or about to be started
	 * SYNC:     actually doing a resync, not a recovery
	 * RECOVER:  doing recovery, or need to try it.
	 * INTR:     resync needs to be aborted for some reason
	 * DONE:     thread is done and is waiting to be reaped
	 * REQUEST:  user-space has requested a sync (used with SYNC)
	 * CHECK:    user-space request for check-only, no repair
	 * RESHAPE:  A reshape is happening
	 * ERROR:    sync-action interrupted because io-error
	 *
	 * If neither SYNC or RESHAPE are set, then it is a recovery.
	 */
#define	MD_RECOVERY_RUNNING	0
#define	MD_RECOVERY_SYNC	1
#define	MD_RECOVERY_RECOVER	2
#define	MD_RECOVERY_INTR	3
#define	MD_RECOVERY_DONE	4
#define	MD_RECOVERY_NEEDED	5
#define	MD_RECOVERY_REQUESTED	6
#define	MD_RECOVERY_CHECK	7
#define MD_RECOVERY_RESHAPE	8
#define	MD_RECOVERY_FROZEN	9
#define	MD_RECOVERY_ERROR	10

	unsigned long			recovery;
	/* If a RAID personality determines that recovery (of a particular
@@ -442,6 +451,23 @@ struct mddev {
	unsigned int			good_device_nr;	/* good device num within cluster raid */
};

enum recovery_flags {
	/*
	 * If neither SYNC or RESHAPE are set, then it is a recovery.
	 */
	MD_RECOVERY_RUNNING,	/* a thread is running, or about to be started */
	MD_RECOVERY_SYNC,	/* actually doing a resync, not a recovery */
	MD_RECOVERY_RECOVER,	/* doing recovery, or need to try it. */
	MD_RECOVERY_INTR,	/* resync needs to be aborted for some reason */
	MD_RECOVERY_DONE,	/* thread is done and is waiting to be reaped */
	MD_RECOVERY_NEEDED,	/* we might need to start a resync/recover */
	MD_RECOVERY_REQUESTED,	/* user-space has requested a sync (used with SYNC) */
	MD_RECOVERY_CHECK,	/* user-space request for check-only, no repair */
	MD_RECOVERY_RESHAPE,	/* A reshape is happening */
	MD_RECOVERY_FROZEN,	/* User request to abort, and not restart, any action */
	MD_RECOVERY_ERROR,	/* sync-action interrupted because io-error */
};

static inline int __must_check mddev_lock(struct mddev *mddev)
{
	return mutex_lock_interruptible(&mddev->reconfig_mutex);
@@ -623,7 +649,7 @@ extern int mddev_congested(struct mddev *mddev, int bits);
extern void md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
			   sector_t sector, int size, struct page *page);
extern void md_super_wait(struct mddev *mddev);
extern int md_super_wait(struct mddev *mddev);
extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
			struct page *page, int op, int op_flags,
			bool metadata_op);
Loading