Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ee7fee0b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: remove rd%d links immediately after stopping an array.
  md: remove ability to explicit set an inactive array to 'clean'.
  md: constify VFTs
  md: tidy up status_resync to handle large arrays.
  md: fix some (more) errors with bitmaps on devices larger than 2TB.
  md/raid10: don't clear bitmap during recovery if array will still be degraded.
  md: fix loading of out-of-date bitmap.
parents 8a0a9bd4 c4647292
Loading
Loading
Loading
Loading
+15 −14
Original line number Original line Diff line number Diff line
@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
			oldindex = index;
			oldindex = index;
			oldpage = page;
			oldpage = page;


			bitmap->filemap[bitmap->file_pages++] = page;
			bitmap->last_page_size = count;

			if (outofdate) {
			if (outofdate) {
				/*
				/*
				 * if bitmap is out of date, dirty the
				 * if bitmap is out of date, dirty the
@@ -998,16 +1001,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
				write_page(bitmap, page, 1);
				write_page(bitmap, page, 1);


				ret = -EIO;
				ret = -EIO;
				if (bitmap->flags & BITMAP_WRITE_ERROR) {
				if (bitmap->flags & BITMAP_WRITE_ERROR)
					/* release, page not in filemap yet */
					put_page(page);
					goto err;
					goto err;
			}
			}
		}
		}

			bitmap->filemap[bitmap->file_pages++] = page;
			bitmap->last_page_size = count;
		}
		paddr = kmap_atomic(page, KM_USER0);
		paddr = kmap_atomic(page, KM_USER0);
		if (bitmap->flags & BITMAP_HOSTENDIAN)
		if (bitmap->flags & BITMAP_HOSTENDIAN)
			b = test_bit(bit, paddr);
			b = test_bit(bit, paddr);
@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
		kunmap_atomic(paddr, KM_USER0);
		kunmap_atomic(paddr, KM_USER0);
		if (b) {
		if (b) {
			/* if the disk bit is set, set the memory bit */
			/* if the disk bit is set, set the memory bit */
			bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
			int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
					       ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start)
				      >= start);
				);
			bitmap_set_memory_bits(bitmap,
					       (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
					       needed);
			bit_cnt++;
			bit_cnt++;
			set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
			set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
		}
		}
@@ -1154,7 +1153,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
			spin_lock_irqsave(&bitmap->lock, flags);
			spin_lock_irqsave(&bitmap->lock, flags);
			clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
			clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
		}
		}
		bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
		bmc = bitmap_get_counter(bitmap,
					 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
					 &blocks, 0);
					 &blocks, 0);
		if (bmc) {
		if (bmc) {
/*
/*
@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
			} else if (*bmc == 1) {
			} else if (*bmc == 1) {
				/* we can clear the bit */
				/* we can clear the bit */
				*bmc = 0;
				*bmc = 0;
				bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
				bitmap_count_page(bitmap,
						  (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
						  -1);
						  -1);


				/* clear the bit */
				/* clear the bit */
@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
	unsigned long chunk;
	unsigned long chunk;


	for (chunk = s; chunk <= e; chunk++) {
	for (chunk = s; chunk <= e; chunk++) {
		sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap);
		sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
		bitmap_set_memory_bits(bitmap, sec, 1);
		bitmap_set_memory_bits(bitmap, sec, 1);
		bitmap_file_set_bit(bitmap, sec);
		bitmap_file_set_bit(bitmap, sec);
	}
	}
+39 −31
Original line number Original line Diff line number Diff line
@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
			} else
			} else
				err = -EBUSY;
				err = -EBUSY;
			spin_unlock_irq(&mddev->write_lock);
			spin_unlock_irq(&mddev->write_lock);
		} else {
		} else
			mddev->ro = 0;
			err = -EINVAL;
			mddev->recovery_cp = MaxSector;
			err = do_md_run(mddev);
		}
		break;
		break;
	case active:
	case active:
		if (mddev->pers) {
		if (mddev->pers) {
@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
{
{
	int err = 0;
	int err = 0;
	struct gendisk *disk = mddev->gendisk;
	struct gendisk *disk = mddev->gendisk;
	mdk_rdev_t *rdev;


	if (atomic_read(&mddev->openers) > is_open) {
	if (atomic_read(&mddev->openers) > is_open) {
		printk("md: %s still in use.\n",mdname(mddev));
		printk("md: %s still in use.\n",mdname(mddev));
@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
			/* tell userspace to handle 'inactive' */
			/* tell userspace to handle 'inactive' */
			sysfs_notify_dirent(mddev->sysfs_state);
			sysfs_notify_dirent(mddev->sysfs_state);


			list_for_each_entry(rdev, &mddev->disks, same_set)
				if (rdev->raid_disk >= 0) {
					char nm[20];
					sprintf(nm, "rd%d", rdev->raid_disk);
					sysfs_remove_link(&mddev->kobj, nm);
				}

			set_capacity(disk, 0);
			set_capacity(disk, 0);
			mddev->changed = 1;
			mddev->changed = 1;


@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
	 * Free resources if final stop
	 * Free resources if final stop
	 */
	 */
	if (mode == 0) {
	if (mode == 0) {
		mdk_rdev_t *rdev;


		printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
		printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));


@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
		}
		}
		mddev->bitmap_offset = 0;
		mddev->bitmap_offset = 0;


		list_for_each_entry(rdev, &mddev->disks, same_set)
			if (rdev->raid_disk >= 0) {
				char nm[20];
				sprintf(nm, "rd%d", rdev->raid_disk);
				sysfs_remove_link(&mddev->kobj, nm);
			}

		/* make sure all md_delayed_delete calls have finished */
		/* make sure all md_delayed_delete calls have finished */
		flush_scheduled_work();
		flush_scheduled_work();


@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq)


static void status_resync(struct seq_file *seq, mddev_t * mddev)
static void status_resync(struct seq_file *seq, mddev_t * mddev)
{
{
	sector_t max_blocks, resync, res;
	sector_t max_sectors, resync, res;
	unsigned long dt, db, rt;
	unsigned long dt, db;
	sector_t rt;
	int scale;
	int scale;
	unsigned int per_milli;
	unsigned int per_milli;


	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
	resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);


	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
		max_blocks = mddev->resync_max_sectors >> 1;
		max_sectors = mddev->resync_max_sectors;
	else
	else
		max_blocks = mddev->dev_sectors / 2;
		max_sectors = mddev->dev_sectors;


	/*
	/*
	 * Should not happen.
	 * Should not happen.
	 */
	 */
	if (!max_blocks) {
	if (!max_sectors) {
		MD_BUG();
		MD_BUG();
		return;
		return;
	}
	}
	/* Pick 'scale' such that (resync>>scale)*1000 will fit
	/* Pick 'scale' such that (resync>>scale)*1000 will fit
	 * in a sector_t, and (max_blocks>>scale) will fit in a
	 * in a sector_t, and (max_sectors>>scale) will fit in a
	 * u32, as those are the requirements for sector_div.
	 * u32, as those are the requirements for sector_div.
	 * Thus 'scale' must be at least 10
	 * Thus 'scale' must be at least 10
	 */
	 */
	scale = 10;
	scale = 10;
	if (sizeof(sector_t) > sizeof(unsigned long)) {
	if (sizeof(sector_t) > sizeof(unsigned long)) {
		while ( max_blocks/2 > (1ULL<<(scale+32)))
		while ( max_sectors/2 > (1ULL<<(scale+32)))
			scale++;
			scale++;
	}
	}
	res = (resync>>scale)*1000;
	res = (resync>>scale)*1000;
	sector_div(res, (u32)((max_blocks>>scale)+1));
	sector_div(res, (u32)((max_sectors>>scale)+1));


	per_milli = res;
	per_milli = res;
	{
	{
@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
		     (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
		     (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
		      "resync" : "recovery"))),
		      "resync" : "recovery"))),
		   per_milli/10, per_milli % 10,
		   per_milli/10, per_milli % 10,
		   (unsigned long long) resync,
		   (unsigned long long) resync/2,
		   (unsigned long long) max_blocks);
		   (unsigned long long) max_sectors/2);


	/*
	/*
	 * We do not want to overflow, so the order of operands and
	 * the * 100 / 100 trick are important. We do a +1 to be
	 * safe against division by zero. We only estimate anyway.
	 *
	 * dt: time from mark until now
	 * dt: time from mark until now
	 * db: blocks written from mark until now
	 * db: blocks written from mark until now
	 * rt: remaining time
	 * rt: remaining time
	 *
	 * rt is a sector_t, so could be 32bit or 64bit.
	 * So we divide before multiply in case it is 32bit and close
	 * to the limit.
	 * We scale the divisor (db) by 32 to avoid loosing precision
	 * near the end of resync when the number of remaining sectors
	 * is close to 'db'.
	 * We then divide rt by 32 after multiplying by db to compensate.
	 * The '+1' avoids division by zero if db is very small.
	 */
	 */
	dt = ((jiffies - mddev->resync_mark) / HZ);
	dt = ((jiffies - mddev->resync_mark) / HZ);
	if (!dt) dt++;
	if (!dt) dt++;
	db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
	db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
		- mddev->resync_mark_cnt;
		- mddev->resync_mark_cnt;
	rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;


	seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6);
	rt = max_sectors - resync;    /* number of remaining sectors */
	sector_div(rt, db/32+1);
	rt *= dt;
	rt >>= 5;

	seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
		   ((unsigned long)rt % 60)/6);


	seq_printf(seq, " speed=%ldK/sec", db/2/dt);
	seq_printf(seq, " speed=%ldK/sec", db/2/dt);
}
}
@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
	return 0;
	return 0;
}
}


static struct seq_operations md_seq_ops = {
static const struct seq_operations md_seq_ops = {
	.start  = md_seq_start,
	.start  = md_seq_start,
	.next   = md_seq_next,
	.next   = md_seq_next,
	.stop   = md_seq_stop,
	.stop   = md_seq_stop,
+6 −6
Original line number Original line Diff line number Diff line
@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
				r10_bio->sector = sect;
				r10_bio->sector = sect;


				raid10_find_phys(conf, r10_bio);
				raid10_find_phys(conf, r10_bio);
				/* Need to check if this section will still be

				/* Need to check if the array will still be
				 * degraded
				 * degraded
				 */
				 */
				for (j=0; j<conf->copies;j++) {
				for (j=0; j<conf->raid_disks; j++)
					int d = r10_bio->devs[j].devnum;
					if (conf->mirrors[j].rdev == NULL ||
					if (conf->mirrors[d].rdev == NULL ||
					    test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
					    test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
						still_degraded = 1;
						still_degraded = 1;
						break;
						break;
					}
					}
				}

				must_sync = bitmap_start_sync(mddev->bitmap, sect,
				must_sync = bitmap_start_sync(mddev->bitmap, sect,
							      &sync_blocks, still_degraded);
							      &sync_blocks, still_degraded);