Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 223cdea4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (53 commits)
  md/raid5 revise rules for when to update metadata during reshape
  md/raid5: minor code cleanups in make_request.
  md: remove CONFIG_MD_RAID_RESHAPE config option.
  md/raid5: be more careful about write ordering when reshaping.
  md: don't display meaningless values in sysfs files resync_start and sync_speed
  md/raid5: allow layout and chunksize to be changed on active array.
  md/raid5: reshape using largest of old and new chunk size
  md/raid5: prepare for allowing reshape to change layout
  md/raid5: prepare for allowing reshape to change chunksize.
  md/raid5: clearly differentiate 'before' and 'after' stripes during reshape.
  Documentation/md.txt update
  md: allow number of drives in raid5 to be reduced
  md/raid5: change reshape-progress measurement to cope with reshaping backwards.
  md: add explicit method to signal the end of a reshape.
  md/raid5: enhance raid5_size to work correctly with negative delta_disks
  md/raid5: drop qd_idx from r6_state
  md/raid6: move raid6 data processing to raid6_pq.ko
  md: raid5 run(): Fix max_degraded for raid level 4.
  md: 'array_size' sysfs attribute
  md: centralize ->array_sectors modifications
  ...
parents 31e6e2da c8f517c4
Loading
Loading
Loading
Loading
+30 −7
Original line number Diff line number Diff line
@@ -164,15 +164,19 @@ All md devices contain:
  raid_disks
     a text file with a simple number indicating the number of devices
     in a fully functional array.  If this is not yet known, the file
     will be empty.  If an array is being resized (not currently
     possible) this will contain the larger of the old and new sizes.
     Some raid level (RAID1) allow this value to be set while the
     array is active.  This will reconfigure the array.   Otherwise
     it can only be set while assembling an array.
     will be empty.  If an array is being resized this will contain
     the new number of devices.
     Some raid levels allow this value to be set while the array is
     active.  This will reconfigure the array.   Otherwise it can only
     be set while assembling an array.
     A change to this attribute will not be permitted if it would
     reduce the size of the array.  To reduce the number of drives
     in an e.g. raid5, the array size must first be reduced by
     setting the 'array_size' attribute.

  chunk_size
     This is the size if bytes for 'chunks' and is only relevant to
     raid levels that involve striping (1,4,5,6,10). The address space
     This is the size in bytes for 'chunks' and is only relevant to
     raid levels that involve striping (0,4,5,6,10). The address space
     of the array is conceptually divided into chunks and consecutive
     chunks are striped onto neighbouring devices.
     The size should be at least PAGE_SIZE (4k) and should be a power
@@ -183,6 +187,20 @@ All md devices contain:
     simply a number that is interpretted differently by different
     levels.  It can be written while assembling an array.

  array_size
     This can be used to artificially constrain the available space in
     the array to be less than is actually available on the combined
     devices.  Writing a number (in Kilobytes) which is less than
     the available size will set the size.  Any reconfiguration of the
     array (e.g. adding devices) will not cause the size to change.
     Writing the word 'default' will cause the effective size of the
     array to be whatever size is actually available based on
     'level', 'chunk_size' and 'component_size'.

     This can be used to reduce the size of the array before reducing
     the number of devices in a raid4/5/6, or to support external
     metadata formats which mandate such clipping.

  reshape_position
     This is either "none" or a sector number within the devices of
     the array where "reshape" is up to.  If this is set, the three
@@ -207,6 +225,11 @@ All md devices contain:
     about the array.  It can be 0.90 (traditional format), 1.0, 1.1,
     1.2 (newer format in varying locations) or "none" indicating that
     the kernel isn't managing metadata at all.
     Alternately it can be "external:" followed by a string which
     is set by user-space.  This indicates that metadata is managed
     by a user-space program.  Any device failure or other event that
     requires a metadata update will cause array activity to be
     suspended until the event is acknowledged.

  resync_start
     The point at which resync should start.  If no resync is needed,
+1 −1
Original line number Diff line number Diff line
@@ -18,8 +18,8 @@

#define BH_TRACE 0
#include <linux/module.h>
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
#include <linux/jiffies.h>
#include <asm/xor.h>

/* The xor routines to use.  */
+3 −28
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ config MD_RAID10
config MD_RAID456
	tristate "RAID-4/RAID-5/RAID-6 mode"
	depends on BLK_DEV_MD
	select MD_RAID6_PQ
	select ASYNC_MEMCPY
	select ASYNC_XOR
	---help---
@@ -151,34 +152,8 @@ config MD_RAID456

	  If unsure, say Y.

config MD_RAID5_RESHAPE
	bool "Support adding drives to a raid-5 array"
	depends on MD_RAID456
	default y
	---help---
	  A RAID-5 set can be expanded by adding extra drives. This
	  requires "restriping" the array which means (almost) every
	  block must be written to a different place.

          This option allows such restriping to be done while the array
	  is online.

	  You will need mdadm version 2.4.1 or later to use this
	  feature safely.  During the early stage of reshape there is
	  a critical section where live data is being over-written.  A
	  crash during this time needs extra care for recovery.  The
	  newer mdadm takes a copy of the data in the critical section
	  and will restore it, if necessary, after a crash.

	  The mdadm usage is e.g.
	       mdadm --grow /dev/md1 --raid-disks=6
	  to grow '/dev/md1' to having 6 disks.

	  Note: The array can only be expanded, not contracted.
	  There should be enough spares already present to make the new
	  array workable.

	  If unsure, say Y.
config MD_RAID6_PQ
	tristate

config MD_MULTIPATH
	tristate "Multipath I/O support"
+9 −7
Original line number Diff line number Diff line
@@ -2,20 +2,21 @@
# Makefile for the kernel software RAID and LVM drivers.
#

dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-multipath-y	+= dm-path-selector.o dm-mpath.o
dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
		    dm-snap-persistent.o
dm-mirror-objs	:= dm-raid1.o
md-mod-objs     := md.o bitmap.o
raid456-objs	:= raid5.o raid6algos.o raid6recov.o raid6tables.o \
dm-mirror-y	+= dm-raid1.o
md-mod-y	+= md.o bitmap.o
raid456-y	+= raid5.o
raid6_pq-y	+= raid6algos.o raid6recov.o raid6tables.o \
		   raid6int1.o raid6int2.o raid6int4.o \
		   raid6int8.o raid6int16.o raid6int32.o \
		   raid6altivec1.o raid6altivec2.o raid6altivec4.o \
		   raid6altivec8.o \
		   raid6mmx.o raid6sse1.o raid6sse2.o
hostprogs-y	:= mktables
hostprogs-y	+= mktables

# Note: link order is important.  All raid personalities
# and must come before md.o, as they each initialise 
@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0)		+= raid0.o
obj-$(CONFIG_MD_RAID1)		+= raid1.o
obj-$(CONFIG_MD_RAID10)		+= raid10.o
obj-$(CONFIG_MD_RAID6_PQ)	+= raid6_pq.o
obj-$(CONFIG_MD_RAID456)	+= raid456.o
obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
obj-$(CONFIG_MD_FAULTY)		+= faulty.o
+39 −10
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
 * wait if count gets too high, wake when it drops to half.
 */

#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
@@ -26,8 +27,8 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/raid/md.h>
#include <linux/raid/bitmap.h>
#include "md.h"
#include "bitmap.h"

/* debug macros */

@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
	unsigned char *mappage;

	if (page >= bitmap->pages) {
		printk(KERN_ALERT
			"%s: invalid bitmap page request: %lu (> %lu)\n",
			bmname(bitmap), page, bitmap->pages-1);
		/* This can happen if bitmap_start_sync goes beyond
		 * End-of-device while looking for a whole page.
		 * It is harmless.
		 */
		return -EINVAL;
	}

@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
	list_for_each_continue_rcu(pos, &mddev->disks) {
		rdev = list_entry(pos, mdk_rdev_t, same_set);
		if (rdev->raid_disk >= 0 &&
		    test_bit(In_sync, &rdev->flags) &&
		    !test_bit(Faulty, &rdev->flags)) {
			/* this is a usable devices */
			atomic_inc(&rdev->nr_pending);
@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
				    + size/512 > 0)
					/* bitmap runs in to metadata */
					goto bad_alignment;
				if (rdev->data_offset + mddev->size*2
				if (rdev->data_offset + mddev->dev_sectors
				    > rdev->sb_start + bitmap->offset)
					/* data runs in to bitmap */
					goto bad_alignment;
@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
		reason = "unrecognized superblock version";
	else if (chunksize < PAGE_SIZE)
	else if (chunksize < 512)
		reason = "bitmap chunksize too small";
	else if ((1 << ffz(~chunksize)) != chunksize)
		reason = "bitmap chunksize not a power of 2";
@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
		PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
		  atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
	}
	if (bitmap->mddev->degraded)
		/* Never clear bits or update events_cleared when degraded */
		success = 0;

	while (sectors) {
		int blocks;
@@ -1345,7 +1349,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
	}
}

int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
			       int degraded)
{
	bitmap_counter_t *bmc;
@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
	return rv;
}

int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
		      int degraded)
{
	/* bitmap_start_sync must always report on multiples of whole
	 * pages, otherwise resync (which is very PAGE_SIZE based) will
	 * get confused.
	 * So call __bitmap_start_sync repeatedly (if needed) until
	 * At least PAGE_SIZE>>9 blocks are covered.
	 * Return the 'or' of the result.
	 */
	int rv = 0;
	int blocks1;

	*blocks = 0;
	while (*blocks < (PAGE_SIZE>>9)) {
		rv |= __bitmap_start_sync(bitmap, offset,
					  &blocks1, degraded);
		offset += blocks1;
		*blocks += blocks1;
	}
	return rv;
}

void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
{
	bitmap_counter_t *bmc;
@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
	wait_event(bitmap->mddev->recovery_wait,
		   atomic_read(&bitmap->mddev->recovery_active) == 0);

	bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
	s = 0;
	while (s < sector && s < bitmap->mddev->resync_max_sectors) {
Loading