Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4b382d06 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: allow resync_start to be set while an array is active.
  md/raid10:  reformat some loops with less indenting.
  md/raid10: remove unused variable.
  md/raid10: make more use of 'slot' in raid10d.
  md/raid10: some tidying up in fix_read_error
  md/raid1: improve handling of pages allocated for write-behind.
  md/raid1: try fix_sync_read_error before process_checks.
  md/raid1: tidy up new functions: process_checks and fix_sync_read_error.
  md/raid1: split out two sub-functions from sync_request_write
  md: make error_handler functions more uniform and correct.
  md/multipath: discard ->working_disks in favour of ->degraded
  md/raid1: clean up read_balance.
  md: simplify raid10 read_balance
  md/bitmap: fix saving of events_cleared and other state.
  md: reject a re-add request that cannot be honoured.
  md: Fix race when creating a new md device.
parents bdfbe804 b098636c
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
	spin_unlock_irqrestore(&bitmap->lock, flags);
	sb = kmap_atomic(bitmap->sb_page, KM_USER0);
	sb->events = cpu_to_le64(bitmap->mddev->events);
	if (bitmap->mddev->events < bitmap->events_cleared) {
	if (bitmap->mddev->events < bitmap->events_cleared)
		/* rocking back to read-only */
		bitmap->events_cleared = bitmap->mddev->events;
	sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
	}
	sb->state = cpu_to_le32(bitmap->flags);
	/* Just in case these have been changed via sysfs: */
	sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
	sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
@@ -618,7 +618,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
		bitmap->flags |= BITMAP_HOSTENDIAN;
	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
	if (sb->state & cpu_to_le32(BITMAP_STALE))
	if (bitmap->flags & BITMAP_STALE)
		bitmap->events_cleared = bitmap->mddev->events;
	err = 0;
out:
@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
	switch (op) {
	case MASK_SET:
		sb->state |= cpu_to_le32(bits);
		bitmap->flags |= bits;
		break;
	case MASK_UNSET:
		sb->state &= cpu_to_le32(~bits);
		bitmap->flags &= ~bits;
		break;
	default:
		BUG();
+19 −4
Original line number Diff line number Diff line
@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
	char *e;
	unsigned long long n = simple_strtoull(buf, &e, 10);

	if (mddev->pers)
	if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
		return -EBUSY;
	if (cmd_match(buf, "none"))
		n = MaxSector;
@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
	disk->fops = &md_fops;
	disk->private_data = mddev;
	disk->queue = mddev->queue;
	blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
	/* Allow extended partitions.  This makes the
	 * 'mdp' device redundant, but we can't really
	 * remove it now.
	 */
	disk->flags |= GENHD_FL_EXT_DEVT;
	add_disk(disk);
	mddev->gendisk = disk;
	/* As soon as we call add_disk(), another thread could get
	 * through to md_open, so make sure it doesn't get too far
	 */
	mutex_lock(&mddev->open_mutex);
	add_disk(disk);

	error = kobject_init_and_add(&mddev->kobj, &md_ktype,
				     &disk_to_dev(disk)->kobj, "%s", "md");
	if (error) {
@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
	if (mddev->kobj.sd &&
	    sysfs_create_group(&mddev->kobj, &md_bitmap_group))
		printk(KERN_DEBUG "pointless warning\n");

	blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
	mutex_unlock(&mddev->open_mutex);
 abort:
	mutex_unlock(&disks_mutex);
	if (!error && mddev->kobj.sd) {
@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
		} else
			super_types[mddev->major_version].
				validate_super(mddev, rdev);
		if ((info->state & (1<<MD_DISK_SYNC)) &&
		    (!test_bit(In_sync, &rdev->flags) ||
		     rdev->raid_disk != info->raid_disk)) {
			/* This was a hot-add request, but events doesn't
			 * match, so reject it.
			 */
			export_rdev(rdev);
			return -EINVAL;
		}

		if (test_bit(In_sync, &rdev->flags))
			rdev->saved_raid_disk = rdev->raid_disk;
		else
+32 −28
Original line number Diff line number Diff line
@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
	int i;
	
	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
						 conf->working_disks);
		    conf->raid_disks - mddev->degraded);
	for (i = 0; i < conf->raid_disks; i++)
		seq_printf (seq, "%s",
			       conf->multipaths[i].rdev && 
@@ -186,8 +186,9 @@ static int multipath_congested(void *data, int bits)
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{
	multipath_conf_t *conf = mddev->private;
	char b[BDEVNAME_SIZE];

	if (conf->working_disks <= 1) {
	if (conf->raid_disks - mddev->degraded <= 1) {
		/*
		 * Uh oh, we can do nothing if this is our last path, but
		 * first check if this is a queued request for a device
@@ -196,25 +197,25 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
		printk(KERN_ALERT 
		       "multipath: only one IO path left and IO error.\n");
		/* leave it active... it's all we have */
	} else {
		return;
	}
	/*
	 * Mark disk as unusable
	 */
		if (!test_bit(Faulty, &rdev->flags)) {
			char b[BDEVNAME_SIZE];
			clear_bit(In_sync, &rdev->flags);
	if (test_and_clear_bit(In_sync, &rdev->flags)) {
		unsigned long flags;
		spin_lock_irqsave(&conf->device_lock, flags);
		mddev->degraded++;
		spin_unlock_irqrestore(&conf->device_lock, flags);
	}
	set_bit(Faulty, &rdev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
			conf->working_disks--;
			mddev->degraded++;
	printk(KERN_ALERT "multipath: IO failure on %s,"
	       " disabling IO path.\n"
	       "multipath: Operation continuing"
	       " on %d IO paths.\n",
	       bdevname(rdev->bdev, b),
				conf->working_disks);
		}
	}
	       conf->raid_disks - mddev->degraded);
}

static void print_multipath_conf (multipath_conf_t *conf)
@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
		printk("(conf==NULL)\n");
		return;
	}
	printk(" --- wd:%d rd:%d\n", conf->working_disks,
	printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
			 conf->raid_disks);

	for (i = 0; i < conf->raid_disks; i++) {
@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
							   PAGE_CACHE_SIZE - 1);
			}

			conf->working_disks++;
			spin_lock_irq(&conf->device_lock);
			mddev->degraded--;
			rdev->raid_disk = path;
			set_bit(In_sync, &rdev->flags);
			spin_unlock_irq(&conf->device_lock);
			rcu_assign_pointer(p->rdev, rdev);
			err = 0;
			md_integrity_add_rdev(rdev, mddev);
@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
	int disk_idx;
	struct multipath_info *disk;
	mdk_rdev_t *rdev;
	int working_disks;

	if (md_check_no_bitmap(mddev))
		return -EINVAL;
@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
		goto out_free_conf;
	}

	conf->working_disks = 0;
	working_disks = 0;
	list_for_each_entry(rdev, &mddev->disks, same_set) {
		disk_idx = rdev->raid_disk;
		if (disk_idx < 0 ||
@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
		}

		if (!test_bit(Faulty, &rdev->flags))
			conf->working_disks++;
			working_disks++;
	}

	conf->raid_disks = mddev->raid_disks;
@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
	spin_lock_init(&conf->device_lock);
	INIT_LIST_HEAD(&conf->retry_list);

	if (!conf->working_disks) {
	if (!working_disks) {
		printk(KERN_ERR "multipath: no operational IO paths for %s\n",
			mdname(mddev));
		goto out_free_conf;
	}
	mddev->degraded = conf->raid_disks - conf->working_disks;
	mddev->degraded = conf->raid_disks - working_disks;

	conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
						 sizeof(struct multipath_bh));
@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)

	printk(KERN_INFO 
		"multipath: array %s active with %d out of %d IO paths\n",
		mdname(mddev), conf->working_disks, mddev->raid_disks);
		mdname(mddev), conf->raid_disks - mddev->degraded,
	       mddev->raid_disks);
	/*
	 * Ok, everything is just fine now
	 */
+0 −1
Original line number Diff line number Diff line
@@ -9,7 +9,6 @@ struct multipath_private_data {
	mddev_t			*mddev;
	struct multipath_info	*multipaths;
	int			raid_disks;
	int			working_disks;
	spinlock_t		device_lock;
	struct list_head	retry_list;

+252 −254
Original line number Diff line number Diff line
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}

static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
			      int behind)
static void r1_bio_write_done(r1bio_t *r1_bio)
{
	if (atomic_dec_and_test(&r1_bio->remaining))
	{
		/* it really is the end of this request */
		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
			/* free extra copy of the data pages */
			int i = vcnt;
			int i = r1_bio->behind_page_count;
			while (i--)
				safe_put_page(bv[i].bv_page);
				safe_put_page(r1_bio->behind_pages[i]);
			kfree(r1_bio->behind_pages);
			r1_bio->behind_pages = NULL;
		}
		/* clear the bitmap if all writes complete successfully */
		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
				r1_bio->sectors,
				!test_bit(R1BIO_Degraded, &r1_bio->state),
				behind);
				test_bit(R1BIO_BehindIO, &r1_bio->state));
		md_write_end(r1_bio->mddev);
		raid_end_bio_io(r1_bio);
	}
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
	 * Let's see if all mirrored write operations have finished
	 * already.
	 */
	r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
	r1_bio_write_done(r1_bio);

	if (to_put)
		bio_put(to_put);
@@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
	const sector_t this_sector = r1_bio->sector;
	const int sectors = r1_bio->sectors;
	int new_disk = -1;
	int start_disk;
	int best_disk;
	int i;
	sector_t new_distance, current_distance;
	sector_t best_dist;
	mdk_rdev_t *rdev;
	int choose_first;

@@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
	 * We take the first readable disk when above the resync window.
	 */
 retry:
	best_disk = -1;
	best_dist = MaxSector;
	if (conf->mddev->recovery_cp < MaxSector &&
	    (this_sector + sectors >= conf->next_resync)) {
		choose_first = 1;
@@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
		start_disk = conf->last_used;
	}

	/* make sure the disk is operational */
	for (i = 0 ; i < conf->raid_disks ; i++) {
		sector_t dist;
		int disk = start_disk + i;
		if (disk >= conf->raid_disks)
			disk -= conf->raid_disks;
@@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || !test_bit(In_sync, &rdev->flags))
		    || test_bit(Faulty, &rdev->flags))
			continue;
		if (!test_bit(In_sync, &rdev->flags) &&
		    rdev->recovery_offset < this_sector + sectors)
			continue;
		if (test_bit(WriteMostly, &rdev->flags)) {
			/* Don't balance among write-mostly, just
			 * use the first as a last resort */
			if (best_disk < 0)
				best_disk = disk;
			continue;

		new_disk = disk;
		if (!test_bit(WriteMostly, &rdev->flags))
			break;
		}

	if (new_disk < 0 || choose_first)
		goto rb_out;

	/*
	 * Don't change to another disk for sequential reads:
		/* This is a reasonable device to use.  It might
		 * even be best.
		 */
	if (conf->next_seq_sect == this_sector)
		goto rb_out;
	if (this_sector == conf->mirrors[new_disk].head_position)
		goto rb_out;

	current_distance = abs(this_sector 
			       - conf->mirrors[new_disk].head_position);

	/* look for a better disk - i.e. head is closer */
	start_disk = new_disk;
	for (i = 1; i < conf->raid_disks; i++) {
		int disk = start_disk + 1;
		if (disk >= conf->raid_disks)
			disk -= conf->raid_disks;

		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
		    || rdev == NULL
		    || !test_bit(In_sync, &rdev->flags)
		    || test_bit(WriteMostly, &rdev->flags))
			continue;

		if (!atomic_read(&rdev->nr_pending)) {
			new_disk = disk;
		dist = abs(this_sector - conf->mirrors[disk].head_position);
		if (choose_first
		    /* Don't change to another disk for sequential reads */
		    || conf->next_seq_sect == this_sector
		    || dist == 0
		    /* If device is idle, use it */
		    || atomic_read(&rdev->nr_pending) == 0) {
			best_disk = disk;
			break;
		}
		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
		if (new_distance < current_distance) {
			current_distance = new_distance;
			new_disk = disk;
		if (dist < best_dist) {
			best_dist = dist;
			best_disk = disk;
		}
	}

 rb_out:
	if (new_disk >= 0) {
		rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
	if (best_disk >= 0) {
		rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
		if (!rdev)
			goto retry;
		atomic_inc(&rdev->nr_pending);
		if (!test_bit(In_sync, &rdev->flags)) {
		if (test_bit(Faulty, &rdev->flags)) {
			/* cannot risk returning a device that failed
			 * before we inc'ed nr_pending
			 */
@@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
			goto retry;
		}
		conf->next_seq_sect = this_sector + sectors;
		conf->last_used = new_disk;
		conf->last_used = best_disk;
	}
	rcu_read_unlock();

	return new_disk;
	return best_disk;
}

static int raid1_congested(void *data, int bits)
@@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)


/* duplicate the data pages for behind I/O 
 * We return a list of bio_vec rather than just page pointers
 * as it makes freeing easier
 */
static struct bio_vec *alloc_behind_pages(struct bio *bio)
static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
{
	int i;
	struct bio_vec *bvec;
	struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
	struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
					GFP_NOIO);
	if (unlikely(!pages))
		goto do_sync_io;
		return;

	bio_for_each_segment(bvec, bio, i) {
		pages[i].bv_page = alloc_page(GFP_NOIO);
		if (unlikely(!pages[i].bv_page))
		pages[i] = alloc_page(GFP_NOIO);
		if (unlikely(!pages[i]))
			goto do_sync_io;
		memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
		memcpy(kmap(pages[i]) + bvec->bv_offset,
			kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
		kunmap(pages[i].bv_page);
		kunmap(pages[i]);
		kunmap(bvec->bv_page);
	}

	return pages;
	r1_bio->behind_pages = pages;
	r1_bio->behind_page_count = bio->bi_vcnt;
	set_bit(R1BIO_BehindIO, &r1_bio->state);
	return;

do_sync_io:
	if (pages)
		for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
			put_page(pages[i].bv_page);
	for (i = 0; i < bio->bi_vcnt; i++)
		if (pages[i])
			put_page(pages[i]);
	kfree(pages);
	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
	return NULL;
}

static int make_request(mddev_t *mddev, struct bio * bio)
@@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
	int i, targets = 0, disks;
	struct bitmap *bitmap;
	unsigned long flags;
	struct bio_vec *behind_pages = NULL;
	const int rw = bio_data_dir(bio);
	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
	if (bitmap &&
	    (atomic_read(&bitmap->behind_writes)
	     < mddev->bitmap_info.max_write_behind) &&
	    !waitqueue_active(&bitmap->behind_wait) &&
	    (behind_pages = alloc_behind_pages(bio)) != NULL)
		set_bit(R1BIO_BehindIO, &r1_bio->state);
	    !waitqueue_active(&bitmap->behind_wait))
		alloc_behind_pages(bio, r1_bio);

	atomic_set(&r1_bio->remaining, 1);
	atomic_set(&r1_bio->behind_remaining, 0);
@@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
		mbio->bi_rw = WRITE | do_flush_fua | do_sync;
		mbio->bi_private = r1_bio;

		if (behind_pages) {
		if (r1_bio->behind_pages) {
			struct bio_vec *bvec;
			int j;

@@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
			 * them all
			 */
			__bio_for_each_segment(bvec, mbio, j, 0)
				bvec->bv_page = behind_pages[j].bv_page;
				bvec->bv_page = r1_bio->behind_pages[j];
			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
				atomic_inc(&r1_bio->behind_remaining);
		}
@@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
		bio_list_add(&conf->pending_bio_list, mbio);
		spin_unlock_irqrestore(&conf->device_lock, flags);
	}
	r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
	kfree(behind_pages); /* the behind pages are attached to the bios now */
	r1_bio_write_done(r1_bio);

	/* In case raid1d snuck in to freeze_array */
	wake_up(&conf->wait_barrier);
@@ -1196,101 +1178,9 @@ static void end_sync_write(struct bio *bio, int error)
	}
}

static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
static int fix_sync_read_error(r1bio_t *r1_bio)
{
	conf_t *conf = mddev->private;
	int i;
	int disks = conf->raid_disks;
	struct bio *bio, *wbio;

	bio = r1_bio->bios[r1_bio->read_disk];


	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
		/* We have read all readable devices.  If we haven't
		 * got the block, then there is no hope left.
		 * If we have, then we want to do a comparison
		 * and skip the write if everything is the same.
		 * If any blocks failed to read, then we need to
		 * attempt an over-write
		 */
		int primary;
		if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
			for (i=0; i<mddev->raid_disks; i++)
				if (r1_bio->bios[i]->bi_end_io == end_sync_read)
					md_error(mddev, conf->mirrors[i].rdev);

			md_done_sync(mddev, r1_bio->sectors, 1);
			put_buf(r1_bio);
			return;
		}
		for (primary=0; primary<mddev->raid_disks; primary++)
			if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
			    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
				r1_bio->bios[primary]->bi_end_io = NULL;
				rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
				break;
			}
		r1_bio->read_disk = primary;
		for (i=0; i<mddev->raid_disks; i++)
			if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
				int j;
				int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
				struct bio *pbio = r1_bio->bios[primary];
				struct bio *sbio = r1_bio->bios[i];

				if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
					for (j = vcnt; j-- ; ) {
						struct page *p, *s;
						p = pbio->bi_io_vec[j].bv_page;
						s = sbio->bi_io_vec[j].bv_page;
						if (memcmp(page_address(p),
							   page_address(s),
							   PAGE_SIZE))
							break;
					}
				} else
					j = 0;
				if (j >= 0)
					mddev->resync_mismatches += r1_bio->sectors;
				if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
					      && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
					sbio->bi_end_io = NULL;
					rdev_dec_pending(conf->mirrors[i].rdev, mddev);
				} else {
					/* fixup the bio for reuse */
					int size;
					sbio->bi_vcnt = vcnt;
					sbio->bi_size = r1_bio->sectors << 9;
					sbio->bi_idx = 0;
					sbio->bi_phys_segments = 0;
					sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
					sbio->bi_flags |= 1 << BIO_UPTODATE;
					sbio->bi_next = NULL;
					sbio->bi_sector = r1_bio->sector +
						conf->mirrors[i].rdev->data_offset;
					sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
					size = sbio->bi_size;
					for (j = 0; j < vcnt ; j++) {
						struct bio_vec *bi;
						bi = &sbio->bi_io_vec[j];
						bi->bv_offset = 0;
						if (size > PAGE_SIZE)
							bi->bv_len = PAGE_SIZE;
						else
							bi->bv_len = size;
						size -= PAGE_SIZE;
						memcpy(page_address(bi->bv_page),
						       page_address(pbio->bi_io_vec[j].bv_page),
						       PAGE_SIZE);
					}

				}
			}
	}
	if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
		/* ouch - failed to read all of that.
		 * Try some synchronous reads of other devices to get
	/* Try some synchronous reads of other devices to get
	 * good data, much like with normal read errors.  Only
	 * read into the pages we already have so we don't
	 * need to re-issue the read request.
@@ -1298,6 +1188,9 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
	 * active sync request, there is no normal IO, and
	 * no overlapping syncs.
	 */
	mddev_t *mddev = r1_bio->mddev;
	conf_t *conf = mddev->private;
	struct bio *bio = r1_bio->bios[r1_bio->read_disk];
	sector_t sect = r1_bio->sector;
	int sectors = r1_bio->sectors;
	int idx = 0;
@@ -1307,6 +1200,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
		int d = r1_bio->read_disk;
		int success = 0;
		mdk_rdev_t *rdev;
		int start;

		if (s > (PAGE_SIZE>>9))
			s = PAGE_SIZE >> 9;
@@ -1331,10 +1225,22 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
				d = 0;
		} while (!success && d != r1_bio->read_disk);

			if (success) {
				int start = d;
		if (!success) {
			char b[BDEVNAME_SIZE];
			/* Cannot read from anywhere, array is toast */
			md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
			printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
			       " for block %llu\n",
			       mdname(mddev),
			       bdevname(bio->bi_bdev, b),
			       (unsigned long long)r1_bio->sector);
			md_done_sync(mddev, r1_bio->sectors, 0);
			put_buf(r1_bio);
			return 0;
		}

		start = d;
		/* write it back and re-read */
				set_bit(R1BIO_Uptodate, &r1_bio->state);
		while (d != r1_bio->read_disk) {
			if (d == 0)
				d = conf->raid_disks;
@@ -1342,13 +1248,16 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
			if (r1_bio->bios[d]->bi_end_io != end_sync_read)
				continue;
			rdev = conf->mirrors[d].rdev;
					atomic_add(s, &rdev->corrected_errors);
			if (sync_page_io(rdev,
					 sect,
					 s<<9,
					 bio->bi_io_vec[idx].bv_page,
							 WRITE, false) == 0)
					 WRITE, false) == 0) {
				r1_bio->bios[d]->bi_end_io = NULL;
				rdev_dec_pending(rdev, mddev);
				md_error(mddev, rdev);
			} else
				atomic_add(s, &rdev->corrected_errors);
		}
		d = start;
		while (d != r1_bio->read_disk) {
@@ -1365,25 +1274,114 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
					 READ, false) == 0)
				md_error(mddev, rdev);
		}
			} else {
				char b[BDEVNAME_SIZE];
				/* Cannot read from anywhere, array is toast */
				md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
				printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
				       " for block %llu\n",
				       mdname(mddev),
				       bdevname(bio->bi_bdev, b),
				       (unsigned long long)r1_bio->sector);
				md_done_sync(mddev, r1_bio->sectors, 0);
				put_buf(r1_bio);
				return;
			}
		sectors -= s;
		sect += s;
		idx ++;
	}
	set_bit(R1BIO_Uptodate, &r1_bio->state);
	set_bit(BIO_UPTODATE, &bio->bi_flags);
	return 1;
}

static int process_checks(r1bio_t *r1_bio)
{
	/* We have read all readable devices.  If we haven't
	 * got the block, then there is no hope left.
	 * If we have, then we want to do a comparison
	 * and skip the write if everything is the same.
	 * If any blocks failed to read, then we need to
	 * attempt an over-write
	 */
	mddev_t *mddev = r1_bio->mddev;
	conf_t *conf = mddev->private;
	int primary;
	int i;

	for (primary = 0; primary < conf->raid_disks; primary++)
		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
			r1_bio->bios[primary]->bi_end_io = NULL;
			rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
			break;
		}
	r1_bio->read_disk = primary;
	for (i = 0; i < conf->raid_disks; i++) {
		int j;
		int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
		struct bio *pbio = r1_bio->bios[primary];
		struct bio *sbio = r1_bio->bios[i];
		int size;

		if (r1_bio->bios[i]->bi_end_io != end_sync_read)
			continue;

		if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
			for (j = vcnt; j-- ; ) {
				struct page *p, *s;
				p = pbio->bi_io_vec[j].bv_page;
				s = sbio->bi_io_vec[j].bv_page;
				if (memcmp(page_address(p),
					   page_address(s),
					   PAGE_SIZE))
					break;
			}
		} else
			j = 0;
		if (j >= 0)
			mddev->resync_mismatches += r1_bio->sectors;
		if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
			      && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
			/* No need to write to this device. */
			sbio->bi_end_io = NULL;
			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
			continue;
		}
		/* fixup the bio for reuse */
		sbio->bi_vcnt = vcnt;
		sbio->bi_size = r1_bio->sectors << 9;
		sbio->bi_idx = 0;
		sbio->bi_phys_segments = 0;
		sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
		sbio->bi_flags |= 1 << BIO_UPTODATE;
		sbio->bi_next = NULL;
		sbio->bi_sector = r1_bio->sector +
			conf->mirrors[i].rdev->data_offset;
		sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
		size = sbio->bi_size;
		for (j = 0; j < vcnt ; j++) {
			struct bio_vec *bi;
			bi = &sbio->bi_io_vec[j];
			bi->bv_offset = 0;
			if (size > PAGE_SIZE)
				bi->bv_len = PAGE_SIZE;
			else
				bi->bv_len = size;
			size -= PAGE_SIZE;
			memcpy(page_address(bi->bv_page),
			       page_address(pbio->bi_io_vec[j].bv_page),
			       PAGE_SIZE);
		}
	}
	return 0;
}

static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
{
	conf_t *conf = mddev->private;
	int i;
	int disks = conf->raid_disks;
	struct bio *bio, *wbio;

	bio = r1_bio->bios[r1_bio->read_disk];

	if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
		/* ouch - failed to read all of that. */
		if (!fix_sync_read_error(r1_bio))
			return;

	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
		if (process_checks(r1_bio) < 0)
			return;
	/*
	 * schedule writes
	 */
@@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
	set_capacity(mddev->gendisk, mddev->array_sectors);
	revalidate_disk(mddev->gendisk);
	if (sectors > mddev->dev_sectors &&
	    mddev->recovery_cp == MaxSector) {
	    mddev->recovery_cp > mddev->dev_sectors) {
		mddev->recovery_cp = mddev->dev_sectors;
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	}
Loading