Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7bf7eac8 authored by Dan Williams's avatar Dan Williams
Browse files

dax: Arrange for dax_supported check to span multiple devices



Pankaj reports that starting with commit ad428cdb "dax: Check the
end of the block-device capacity with dax_direct_access()" device-mapper
no longer allows dax operation. This results from the stricter checks in
__bdev_dax_supported() that validate that the start and end of a
block-device map to the same 'pagemap' instance.

Teach the dax-core and device-mapper to validate the 'pagemap' on a
per-target basis. This is accomplished by refactoring the
bdev_dax_supported() internals into generic_fsdax_supported() which
takes a sector range to validate. Consequently generic_fsdax_supported()
is suitable to be used in a device-mapper ->iterate_devices() callback.
A new ->dax_supported() operation is added to allow composite devices to
split and route upper-level bdev_dax_supported() requests.

Fixes: ad428cdb ("dax: Check the end of the block-device...")
Cc: <stable@vger.kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Reported-by: default avatarPankaj Gupta <pagupta@redhat.com>
Reviewed-by: default avatarPankaj Gupta <pagupta@redhat.com>
Tested-by: default avatarPankaj Gupta <pagupta@redhat.com>
Tested-by: default avatarVaibhav Jain <vaibhav@linux.ibm.com>
Reviewed-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent c01dafad
Loading
Loading
Loading
Loading
+57 −31
Original line number Original line Diff line number Diff line
@@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
#endif
#endif


/**
bool __generic_fsdax_supported(struct dax_device *dax_dev,
 * __bdev_dax_supported() - Check if the device supports dax for filesystem
		struct block_device *bdev, int blocksize, sector_t start,
 * @bdev: block device to check
		sector_t sectors)
 * @blocksize: The block size of the device
 *
 * This is a library function for filesystems to check if the block device
 * can be mounted with dax option.
 *
 * Return: true if supported, false if unsupported
 */
bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
{
	struct dax_device *dax_dev;
	bool dax_enabled = false;
	bool dax_enabled = false;
	pgoff_t pgoff, pgoff_end;
	pgoff_t pgoff, pgoff_end;
	struct request_queue *q;
	char buf[BDEVNAME_SIZE];
	char buf[BDEVNAME_SIZE];
	void *kaddr, *end_kaddr;
	void *kaddr, *end_kaddr;
	pfn_t pfn, end_pfn;
	pfn_t pfn, end_pfn;
@@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
		return false;
		return false;
	}
	}


	q = bdev_get_queue(bdev);
	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
	if (!q || !blk_queue_dax(q)) {
		pr_debug("%s: error: request queue doesn't support dax\n",
				bdevname(bdev, buf));
		return false;
	}

	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
	if (err) {
	if (err) {
		pr_debug("%s: error: unaligned partition for dax\n",
		pr_debug("%s: error: unaligned partition for dax\n",
				bdevname(bdev, buf));
				bdevname(bdev, buf));
		return false;
		return false;
	}
	}


	last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
	if (err) {
	if (err) {
		pr_debug("%s: error: unaligned partition for dax\n",
		pr_debug("%s: error: unaligned partition for dax\n",
@@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
		return false;
		return false;
	}
	}


	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
	if (!dax_dev) {
		pr_debug("%s: error: device does not support dax\n",
				bdevname(bdev, buf));
		return false;
	}

	id = dax_read_lock();
	id = dax_read_lock();
	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
	dax_read_unlock(id);
	dax_read_unlock(id);


	put_dax(dax_dev);

	if (len < 1 || len2 < 1) {
	if (len < 1 || len2 < 1) {
		pr_debug("%s: error: dax access failed (%ld)\n",
		pr_debug("%s: error: dax access failed (%ld)\n",
				bdevname(bdev, buf), len < 1 ? len : len2);
				bdevname(bdev, buf), len < 1 ? len : len2);
@@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
	}
	}
	return true;
	return true;
}
}
EXPORT_SYMBOL_GPL(__generic_fsdax_supported);

/**
 * __bdev_dax_supported() - Check if the device supports dax for filesystem
 * @bdev: block device to check
 * @blocksize: The block size of the device
 *
 * This is a library function for filesystems to check if the block device
 * can be mounted with dax option.
 *
 * Return: true if supported, false if unsupported
 */
bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
{
	struct dax_device *dax_dev;
	struct request_queue *q;
	char buf[BDEVNAME_SIZE];
	bool ret;
	int id;

	q = bdev_get_queue(bdev);
	if (!q || !blk_queue_dax(q)) {
		pr_debug("%s: error: request queue doesn't support dax\n",
				bdevname(bdev, buf));
		return false;
	}

	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
	if (!dax_dev) {
		pr_debug("%s: error: device does not support dax\n",
				bdevname(bdev, buf));
		return false;
	}

	id = dax_read_lock();
	ret = dax_supported(dax_dev, bdev, blocksize, 0,
			i_size_read(bdev->bd_inode) / 512);
	dax_read_unlock(id);

	put_dax(dax_dev);

	return ret;
}
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
#endif
#endif


@@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
}
}
EXPORT_SYMBOL_GPL(dax_direct_access);
EXPORT_SYMBOL_GPL(dax_direct_access);


bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
		int blocksize, sector_t start, sector_t len)
{
	if (!dax_alive(dax_dev))
		return false;

	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
}

size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
		size_t bytes, struct iov_iter *i)
		size_t bytes, struct iov_iter *i)
{
{
+11 −6
Original line number Original line Diff line number Diff line
@@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
}
}
EXPORT_SYMBOL_GPL(dm_table_set_type);
EXPORT_SYMBOL_GPL(dm_table_set_type);


/* validate the dax capability of the target device span */
static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
				       sector_t start, sector_t len, void *data)
				       sector_t start, sector_t len, void *data)
{
{
	return bdev_dax_supported(dev->bdev, PAGE_SIZE);
	int blocksize = *(int *) data;

	return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
			start, len);
}
}


static bool dm_table_supports_dax(struct dm_table *t)
bool dm_table_supports_dax(struct dm_table *t, int blocksize)
{
{
	struct dm_target *ti;
	struct dm_target *ti;
	unsigned i;
	unsigned i;
@@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct dm_table *t)
			return false;
			return false;


		if (!ti->type->iterate_devices ||
		if (!ti->type->iterate_devices ||
		    !ti->type->iterate_devices(ti, device_supports_dax, NULL))
		    !ti->type->iterate_devices(ti, device_supports_dax,
			    &blocksize))
			return false;
			return false;
	}
	}


@@ -979,7 +984,7 @@ static int dm_table_determine_type(struct dm_table *t)
verify_bio_based:
verify_bio_based:
		/* We must use this table as bio-based */
		/* We must use this table as bio-based */
		t->type = DM_TYPE_BIO_BASED;
		t->type = DM_TYPE_BIO_BASED;
		if (dm_table_supports_dax(t) ||
		if (dm_table_supports_dax(t, PAGE_SIZE) ||
		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
			t->type = DM_TYPE_DAX_BIO_BASED;
			t->type = DM_TYPE_DAX_BIO_BASED;
		} else {
		} else {
@@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
	}
	}
	blk_queue_write_cache(q, wc, fua);
	blk_queue_write_cache(q, wc, fua);


	if (dm_table_supports_dax(t))
	if (dm_table_supports_dax(t, PAGE_SIZE))
		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
	else
	else
		blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
		blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
+20 −0
Original line number Original line Diff line number Diff line
@@ -1107,6 +1107,25 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
	return ret;
	return ret;
}
}


static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
		int blocksize, sector_t start, sector_t len)
{
	struct mapped_device *md = dax_get_private(dax_dev);
	struct dm_table *map;
	int srcu_idx;
	bool ret;

	map = dm_get_live_table(md, &srcu_idx);
	if (!map)
		return false;

	ret = dm_table_supports_dax(map, blocksize);

	dm_put_live_table(md, srcu_idx);

	return ret;
}

static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
				    void *addr, size_t bytes, struct iov_iter *i)
				    void *addr, size_t bytes, struct iov_iter *i)
{
{
@@ -3192,6 +3211,7 @@ static const struct block_device_operations dm_blk_dops = {


static const struct dax_operations dm_dax_ops = {
static const struct dax_operations dm_dax_ops = {
	.direct_access = dm_dax_direct_access,
	.direct_access = dm_dax_direct_access,
	.dax_supported = dm_dax_supported,
	.copy_from_iter = dm_dax_copy_from_iter,
	.copy_from_iter = dm_dax_copy_from_iter,
	.copy_to_iter = dm_dax_copy_to_iter,
	.copy_to_iter = dm_dax_copy_to_iter,
};
};
+1 −0
Original line number Original line Diff line number Diff line
@@ -72,6 +72,7 @@ bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
bool dm_table_supports_dax(struct dm_table *t, int blocksize);


void dm_lock_md_type(struct mapped_device *md);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
+1 −0
Original line number Original line Diff line number Diff line
@@ -295,6 +295,7 @@ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,


static const struct dax_operations pmem_dax_ops = {
static const struct dax_operations pmem_dax_ops = {
	.direct_access = pmem_dax_direct_access,
	.direct_access = pmem_dax_direct_access,
	.dax_supported = generic_fsdax_supported,
	.copy_from_iter = pmem_copy_from_iter,
	.copy_from_iter = pmem_copy_from_iter,
	.copy_to_iter = pmem_copy_to_iter,
	.copy_to_iter = pmem_copy_to_iter,
};
};
Loading