Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c83f6bf9 authored by Vivek Goyal's avatar Vivek Goyal Committed by Jens Axboe
Browse files

block: add partition resize function to blkpg ioctl



Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.

This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.

Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().

v2: Add CONFIG_LBDAF test to UP preempt case as suggested by Phillip.

Signed-off-by: default avatarVivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarPhillip Susi <psusi@ubuntu.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 4638a83e
Loading
Loading
Loading
Loading
+15 −5
Original line number Diff line number Diff line
@@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
		part = rcu_dereference(ptbl->part[piter->idx]);
		if (!part)
			continue;
		if (!part->nr_sects &&
		if (!part_nr_sects_read(part) &&
		    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
		    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
		      piter->idx == 0))
@@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
{
	return part->start_sect <= sector &&
		sector < part->start_sect + part->nr_sects;
		sector < part->start_sect + part_nr_sects_read(part);
}

/**
@@ -769,8 +769,8 @@ void __init printk_all_partitions(void)

			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
			       bdevt_str(part_devt(part), devt_buf),
			       (unsigned long long)part->nr_sects >> 1,
			       disk_name(disk, part->partno, name_buf),
			       (unsigned long long)part_nr_sects_read(part) >> 1
			       , disk_name(disk, part->partno, name_buf),
			       uuid_buf);
			if (is_part0) {
				if (disk->driverfs_dev != NULL &&
@@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
	while ((part = disk_part_iter_next(&piter)))
		seq_printf(seqf, "%4d  %7d %10llu %s\n",
			   MAJOR(part_devt(part)), MINOR(part_devt(part)),
			   (unsigned long long)part->nr_sects >> 1,
			   (unsigned long long)part_nr_sects_read(part) >> 1,
			   disk_name(sgp, part->partno, buf));
	disk_part_iter_exit(&piter);

@@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
		}
		disk->part_tbl->part[0] = &disk->part0;

		/*
		 * set_capacity() and get_capacity() currently don't use
		 * seqcounter to read/update the part0->nr_sects. Still init
		 * the counter as we can read the sectors in IO submission
		 * patch using seqence counters.
		 *
		 * TODO: Ideally set_capacity() and get_capacity() should be
		 * converted to make use of bd_mutex and sequence counters.
		 */
		seqcount_init(&disk->part0.nr_sects_seq);
		hd_ref_init(&disk->part0);

		disk->minors = minors;
+56 −3
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
{
	struct block_device *bdevp;
	struct gendisk *disk;
	struct hd_struct *part;
	struct hd_struct *part, *lpart;
	struct blkpg_ioctl_arg a;
	struct blkpg_partition p;
	struct disk_part_iter piter;
@@ -91,6 +91,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
			mutex_unlock(&bdevp->bd_mutex);
			bdput(bdevp);

			return 0;
		case BLKPG_RESIZE_PARTITION:
			start = p.start >> 9;
			/* new length of partition in bytes */
			length = p.length >> 9;
			/* check for fit in a hd_struct */
			if (sizeof(sector_t) == sizeof(long) &&
			    sizeof(long long) > sizeof(long)) {
				long pstart = start, plength = length;
				if (pstart != start || plength != length
				    || pstart < 0 || plength < 0)
					return -EINVAL;
			}
			part = disk_get_part(disk, partno);
			if (!part)
				return -ENXIO;
			bdevp = bdget(part_devt(part));
			if (!bdevp) {
				disk_put_part(part);
				return -ENOMEM;
			}
			mutex_lock(&bdevp->bd_mutex);
			mutex_lock_nested(&bdev->bd_mutex, 1);
			if (start != part->start_sect) {
				mutex_unlock(&bdevp->bd_mutex);
				mutex_unlock(&bdev->bd_mutex);
				bdput(bdevp);
				disk_put_part(part);
				return -EINVAL;
			}
			/* overlap? */
			disk_part_iter_init(&piter, disk,
					    DISK_PITER_INCL_EMPTY);
			while ((lpart = disk_part_iter_next(&piter))) {
				if (lpart->partno != partno &&
				   !(start + length <= lpart->start_sect ||
				   start >= lpart->start_sect + lpart->nr_sects)
				   ) {
					disk_part_iter_exit(&piter);
					mutex_unlock(&bdevp->bd_mutex);
					mutex_unlock(&bdev->bd_mutex);
					bdput(bdevp);
					disk_put_part(part);
					return -EBUSY;
				}
			}
			disk_part_iter_exit(&piter);
			part_nr_sects_write(part, (sector_t)length);
			i_size_write(bdevp->bd_inode, p.length);
			mutex_unlock(&bdevp->bd_mutex);
			mutex_unlock(&bdev->bd_mutex);
			bdput(bdevp);
			disk_put_part(part);
			return 0;
		default:
			return -EINVAL;
+3 −1
Original line number Diff line number Diff line
@@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
	return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
	return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
}

static ssize_t part_ro_show(struct device *dev,
@@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
		err = -ENOMEM;
		goto out_free;
	}

	seqcount_init(&p->nr_sects_seq);
	pdev = part_to_dev(p);

	p->start_sect = start;
+1 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
/* The subfunctions (for the op field) */
#define BLKPG_ADD_PARTITION	1
#define BLKPG_DEL_PARTITION	2
#define BLKPG_RESIZE_PARTITION	3

/* Sizes of name fields. Unused at present. */
#define BLKPG_DEVNAMELTH	64
+57 −0
Original line number Diff line number Diff line
@@ -98,7 +98,13 @@ struct partition_meta_info {

struct hd_struct {
	sector_t start_sect;
	/*
	 * nr_sects is protected by sequence counter. One might extend a
	 * partition while IO is happening to it and update of nr_sects
	 * can be non-atomic on 32bit machines with 64bit sector_t.
	 */
	sector_t nr_sects;
	seqcount_t nr_sects_seq;
	sector_t alignment_offset;
	unsigned int discard_alignment;
	struct device __dev;
@@ -648,6 +654,57 @@ static inline void hd_struct_put(struct hd_struct *part)
		__delete_partition(part);
}

/*
 * Any access of part->nr_sects which is not protected by partition
 * bd_mutex or gendisk bdev bd_mutex, should be done using this
 * accessor function.
 *
 * Code written along the lines of i_size_read() and i_size_write().
 * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
 * on.
 */
static inline sector_t part_nr_sects_read(struct hd_struct *part)
{
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
	sector_t nr_sects;
	unsigned seq;
	do {
		seq = read_seqcount_begin(&part->nr_sects_seq);
		nr_sects = part->nr_sects;
	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
	return nr_sects;
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
	sector_t nr_sects;

	preempt_disable();
	nr_sects = part->nr_sects;
	preempt_enable();
	return nr_sects;
#else
	return part->nr_sects;
#endif
}

/*
 * Should be called with mutex lock held (typically bd_mutex) of partition
 * to provide mutual exlusion among writers otherwise seqcount might be
 * left in wrong state leaving the readers spinning infinitely.
 */
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
	write_seqcount_begin(&part->nr_sects_seq);
	part->nr_sects = size;
	write_seqcount_end(&part->nr_sects_seq);
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
	preempt_disable();
	part->nr_sects = size;
	preempt_enable();
#else
	part->nr_sects = size;
#endif
}

#else /* CONFIG_BLOCK */

static inline void printk_all_partitions(void) { }