Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ece5cff0 authored by NeilBrown's avatar NeilBrown
Browse files

md: Support write-intent bitmaps with externally managed metadata.



In this case, the metadata needs to not be in the same
sector as the bitmap.
md will not read/write any bitmap metadata.  Config must be
done via sysfs and when a recovery makes the array non-degraded
again, writing 'true' to 'bitmap/can_clear' will allow bits in
the bitmap to be cleared again.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 624ce4f5
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -322,6 +322,22 @@ All md devices contain:
     'backlog' sets a limit on the number of concurrent background
     writes.  If there are more than this, new writes will by
     synchronous.
  bitmap/metadata
     This can be either 'internal' or 'external'.
     'internal' is the default and means the metadata for the bitmap
     is stored in the first 256 bytes of the allocated space and is
     managed by the md module.
     'external' means that bitmap metadata is managed externally to
     the kernel (i.e. by some userspace program)
  bitmap/can_clear
     This is either 'true' or 'false'.  If 'true', then bits in the
     bitmap will be cleared when the corresponding blocks are thought
     to be in-sync.  If 'false', bits will never be cleared.
     This is automatically set to 'false' if a write happens on a
     degraded array, or if the array becomes degraded during a write.
     When metadata is managed externally, it should be set to true
     once the array becomes non-degraded, and this fact has been
     recorded in the metadata.
     
     
     
+119 −23
Original line number Diff line number Diff line
@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)

	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
		return;
	if (bitmap->mddev->bitmap_info.external)
		return;
	spin_lock_irqsave(&bitmap->lock, flags);
	if (!bitmap->sb_page) { /* no superblock */
		spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
 * general bitmap file operations
 */

/*
 * on-disk bitmap:
 *
 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
 * file a page at a time. There's a superblock at the start of the file.
 */
/* calculate the index of the page that contains this bit */
static inline unsigned long file_page_index(unsigned long chunk)
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
{
	return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
	if (!bitmap->mddev->bitmap_info.external)
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk >> PAGE_BIT_SHIFT;
}

/* calculate the (bit) offset of this bit within a page */
static inline unsigned long file_page_offset(unsigned long chunk)
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
{
	return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
	if (!bitmap->mddev->bitmap_info.external)
		chunk += sizeof(bitmap_super_t) << 3;
	return chunk & (PAGE_BITS - 1);
}

/*
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
static inline struct page *filemap_get_page(struct bitmap *bitmap,
					unsigned long chunk)
{
	if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
	return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
	if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
	return bitmap->filemap[file_page_index(bitmap, chunk)
			       - file_page_index(bitmap, 0)];
}


@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
	spin_unlock_irqrestore(&bitmap->lock, flags);

	while (pages--)
		if (map[pages]->index != 0) /* 0 is sb_page, release it below */
		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
			free_buffers(map[pages]);
	kfree(map);
	kfree(attr);
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)

	page = filemap_get_page(bitmap, chunk);
	if (!page) return;
	bit = file_page_offset(chunk);
	bit = file_page_offset(bitmap, chunk);

 	/* set the bit */
	kaddr = kmap_atomic(page, KM_USER0);
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
			"recovery\n", bmname(bitmap));

	bytes = (chunks + 7) / 8;
	if (!bitmap->mddev->bitmap_info.external)
		bytes += sizeof(bitmap_super_t);

	
	num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
	num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;

	if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
	if (file && i_size_read(file->f_mapping->host) < bytes) {
		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
			bmname(bitmap),
			(unsigned long) i_size_read(file->f_mapping->host),
			bytes + sizeof(bitmap_super_t));
			bytes);
		goto err;
	}

@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)

	for (i = 0; i < chunks; i++) {
		int b;
		index = file_page_index(i);
		bit = file_page_offset(i);
		index = file_page_index(bitmap, i);
		bit = file_page_offset(bitmap, i);
		if (index != oldindex) { /* this is a new page, read it in */
			int count;
			/* unmap the old page, we're done with it */
			if (index == num_pages-1)
				count = bytes + sizeof(bitmap_super_t)
					- index * PAGE_SIZE;
				count = bytes - index * PAGE_SIZE;
			else
				count = PAGE_SIZE;
			if (index == 0) {
			if (index == 0 && bitmap->sb_page) {
				/*
				 * if we're here then the superblock page
				 * contains some bits (PAGE_SIZE != sizeof sb)
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
			/* We are possibly going to clear some bits, so make
			 * sure that events_cleared is up-to-date.
			 */
			if (bitmap->need_sync) {
			if (bitmap->need_sync &&
			    bitmap->mddev->bitmap_info.external == 0) {
				bitmap_super_t *sb;
				bitmap->need_sync = 0;
				sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1174,6 +1190,7 @@ void bitmap_daemon_work(mddev_t *mddev)
				write_page(bitmap, bitmap->sb_page, 1);
			}
			spin_lock_irqsave(&bitmap->lock, flags);
			if (!bitmap->need_sync)
				clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
		}
		bmc = bitmap_get_counter(bitmap,
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
			if (*bmc == 2) {
				*bmc=1; /* maybe clear the bit next time */
				set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
			} else if (*bmc == 1) {
			} else if (*bmc == 1 && !bitmap->need_sync) {
				/* we can clear the bit */
				*bmc = 0;
				bitmap_count_page(bitmap,
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
				/* clear the bit */
				paddr = kmap_atomic(page, KM_USER0);
				if (bitmap->flags & BITMAP_HOSTENDIAN)
					clear_bit(file_page_offset(j), paddr);
					clear_bit(file_page_offset(bitmap, j),
						  paddr);
				else
					ext2_clear_bit(file_page_offset(j), paddr);
					ext2_clear_bit(file_page_offset(bitmap, j),
						       paddr);
				kunmap_atomic(paddr, KM_USER0);
			}
		} else
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
		    bitmap->events_cleared < bitmap->mddev->events) {
			bitmap->events_cleared = bitmap->mddev->events;
			bitmap->need_sync = 1;
			sysfs_notify_dirent(bitmap->sysfs_can_clear);
		}

		if (!success && ! (*bmc & NEEDED_MASK))
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
	if (mddev->thread)
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;

	if (bitmap->sysfs_can_clear)
		sysfs_put(bitmap->sysfs_can_clear);

	bitmap_free(bitmap);
}

@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
	struct file *file = mddev->bitmap_info.file;
	int err;
	sector_t start;
	struct sysfs_dirent *bm;

	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);

@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)

	bitmap->mddev = mddev;

	bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
	if (bm) {
		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
		sysfs_put(bm);
	} else
		bitmap->sysfs_can_clear = NULL;

	bitmap->file = file;
	if (file) {
		get_file(file);
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
		vfs_fsync(file, file->f_dentry, 1);
	}
	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
	if (!mddev->bitmap_info.external)
		err = bitmap_read_sb(bitmap);
	else {
		err = 0;
		if (mddev->bitmap_info.chunksize == 0 ||
		    mddev->bitmap_info.daemon_sleep == 0)
			/* chunksize and time_base need to be
			 * set first. */
			err = -EINVAL;
	}
	if (err)
		goto error;

@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
				return rv;
			if (offset == 0)
				return -EINVAL;
			if (mddev->major_version == 0 &&
			if (mddev->bitmap_info.external == 0 &&
			    mddev->major_version == 0 &&
			    offset != mddev->bitmap_info.default_offset)
				return -EINVAL;
			mddev->bitmap_info.offset = offset;
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry bitmap_chunksize =
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);

static ssize_t metadata_show(mddev_t *mddev, char *page)
{
	return sprintf(page, "%s\n", (mddev->bitmap_info.external
				      ? "external" : "internal"));
}

static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
{
	if (mddev->bitmap ||
	    mddev->bitmap_info.file ||
	    mddev->bitmap_info.offset)
		return -EBUSY;
	if (strncmp(buf, "external", 8) == 0)
		mddev->bitmap_info.external = 1;
	else if (strncmp(buf, "internal", 8) == 0)
		mddev->bitmap_info.external = 0;
	else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_metadata =
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);

static ssize_t can_clear_show(mddev_t *mddev, char *page)
{
	int len;
	if (mddev->bitmap)
		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
					     "false" : "true"));
	else
		len = sprintf(page, "\n");
	return len;
}

static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
{
	if (mddev->bitmap == NULL)
		return -ENOENT;
	if (strncmp(buf, "false", 5) == 0)
		mddev->bitmap->need_sync = 1;
	else if (strncmp(buf, "true", 4) == 0) {
		if (mddev->degraded)
			return -EBUSY;
		mddev->bitmap->need_sync = 0;
	} else
		return -EINVAL;
	return len;
}

static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);

static struct attribute *md_bitmap_attrs[] = {
	&bitmap_location.attr,
	&bitmap_timeout.attr,
	&bitmap_backlog.attr,
	&bitmap_chunksize.attr,
	&bitmap_metadata.attr,
	&bitmap_can_clear.attr,
	NULL
};
struct attribute_group md_bitmap_group = {
+1 −10
Original line number Diff line number Diff line
@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)

/*
 * on-disk bitmap:
 *
 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
 * file a page at a time. There's a superblock at the start of the file.
 */

/* map chunks (bits) to file pages - offset by the size of the superblock */
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))

#endif

/*
@@ -250,6 +240,7 @@ struct bitmap {
	wait_queue_head_t write_wait;
	wait_queue_head_t overflow_wait;

	struct sysfs_dirent *sysfs_can_clear;
};

/* the bitmap API */
+1 −0
Original line number Diff line number Diff line
@@ -296,6 +296,7 @@ struct mddev_s
		unsigned long		chunksize;
		unsigned long		daemon_sleep; /* how many seconds between updates? */
		unsigned long		max_write_behind; /* write-behind mode */
		int			external;
	} bitmap_info;

	struct list_head		all_mddevs;