Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 057d3ccf authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'stable/for-jens-4.19' of...

Merge branch 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen into for-linus

Pull Xen block driver fixes from Konrad:

"Fix for flushing out persistent pages at a deterministic rate"

* 'stable/for-jens-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
  xen/blkback: remove unused pers_gnts_lock from struct xen_blkif_ring
  xen/blkback: move persistent grants flags to bool
  xen/blkfront: reorder tests in xlblk_init()
  xen/blkfront: cleanup stale persistent grants
  xen/blkback: don't keep persistent grants too long
parents 38cfb5a4 6f2f39ad
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -15,3 +15,13 @@ Description:
                blkback. If the frontend tries to use more than
                max_persistent_grants, the LRU kicks in and starts
                removing 5% of max_persistent_grants every 100ms.

What:           /sys/module/xen_blkback/parameters/persistent_grant_unused_seconds
Date:           August 2018
KernelVersion:  4.19
Contact:        Roger Pau Monné <roger.pau@citrix.com>
Description:
                How long a persistent grant is allowed to remain
                allocated without being in use. The time is in
                seconds, 0 means indefinitely long.
                The default is 60 seconds.
+53 −46
Original line number Diff line number Diff line
@@ -83,6 +83,18 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
MODULE_PARM_DESC(max_persistent_grants,
                 "Maximum number of grants to map persistently");

/*
 * How long a persistent grant is allowed to remain allocated without being in
 * use. The time is in seconds, 0 means indefinitely long.
 */

static unsigned int xen_blkif_pgrant_timeout = 60;
module_param_named(persistent_grant_unused_seconds, xen_blkif_pgrant_timeout,
		   uint, 0644);
MODULE_PARM_DESC(persistent_grant_unused_seconds,
		 "Time in seconds an unused persistent grant is allowed to "
		 "remain allocated. Default is 60, 0 means unlimited.");

/*
 * Maximum number of rings/queues blkback supports, allow as many queues as there
 * are CPUs if user has not specified a value.
@@ -123,6 +135,13 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10

static inline bool persistent_gnt_timeout(struct persistent_gnt *persistent_gnt)
{
	return xen_blkif_pgrant_timeout &&
	       (jiffies - persistent_gnt->last_used >=
		HZ * xen_blkif_pgrant_timeout);
}

static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
	unsigned long flags;
@@ -236,8 +255,7 @@ static int add_persistent_gnt(struct xen_blkif_ring *ring,
		}
	}

	bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
	set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
	persistent_gnt->active = true;
	/* Add new node and rebalance tree. */
	rb_link_node(&(persistent_gnt->node), parent, new);
	rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
@@ -261,11 +279,11 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
		else if (gref > data->gnt)
			node = node->rb_right;
		else {
			if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
			if (data->active) {
				pr_alert_ratelimited("requesting a grant already in use\n");
				return NULL;
			}
			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
			data->active = true;
			atomic_inc(&ring->persistent_gnt_in_use);
			return data;
		}
@@ -276,10 +294,10 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
static void put_persistent_gnt(struct xen_blkif_ring *ring,
                               struct persistent_gnt *persistent_gnt)
{
	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
	if (!persistent_gnt->active)
		pr_alert_ratelimited("freeing a grant already unused\n");
	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
	persistent_gnt->last_used = jiffies;
	persistent_gnt->active = false;
	atomic_dec(&ring->persistent_gnt_in_use);
}

@@ -371,26 +389,26 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
	struct persistent_gnt *persistent_gnt;
	struct rb_node *n;
	unsigned int num_clean, total;
	bool scan_used = false, clean_used = false;
	bool scan_used = false;
	struct rb_root *root;

	if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
	    (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
	    !ring->blkif->vbd.overflow_max_grants)) {
		goto out;
	}

	if (work_busy(&ring->persistent_purge_work)) {
		pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
		goto out;
	}

	if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
	    (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
	    !ring->blkif->vbd.overflow_max_grants)) {
		num_clean = 0;
	} else {
		num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
	num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
		num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants +
			    num_clean;
		num_clean = min(ring->persistent_gnt_c, num_clean);
	if ((num_clean == 0) ||
	    (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
		goto out;
		pr_debug("Going to purge at least %u persistent grants\n",
			 num_clean);
	}

	/*
	 * At this point, we can assure that there will be no calls
@@ -401,9 +419,7 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
         * number of grants.
	 */

	total = num_clean;

	pr_debug("Going to purge %u persistent grants\n", num_clean);
	total = 0;

	BUG_ON(!list_empty(&ring->persistent_purge_list));
	root = &ring->persistent_gnts;
@@ -412,46 +428,37 @@ static void purge_persistent_gnt(struct xen_blkif_ring *ring)
		BUG_ON(persistent_gnt->handle ==
			BLKBACK_INVALID_HANDLE);

		if (clean_used) {
			clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
		if (persistent_gnt->active)
			continue;
		}

		if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
		if (!scan_used && !persistent_gnt_timeout(persistent_gnt))
			continue;
		if (!scan_used &&
		    (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
		if (scan_used && total >= num_clean)
			continue;

		rb_erase(&persistent_gnt->node, root);
		list_add(&persistent_gnt->remove_node,
			 &ring->persistent_purge_list);
		if (--num_clean == 0)
			goto finished;
		total++;
	}
	/*
	 * If we get here it means we also need to start cleaning
	 * Check whether we also need to start cleaning
	 * grants that were used since last purge in order to cope
	 * with the requested num
	 */
	if (!scan_used && !clean_used) {
		pr_debug("Still missing %u purged frames\n", num_clean);
	if (!scan_used && total < num_clean) {
		pr_debug("Still missing %u purged frames\n", num_clean - total);
		scan_used = true;
		goto purge_list;
	}
finished:
	if (!clean_used) {
		pr_debug("Finished scanning for grants to clean, removing used flag\n");
		clean_used = true;
		goto purge_list;
	}

	ring->persistent_gnt_c -= (total - num_clean);
	if (total) {
		ring->persistent_gnt_c -= total;
		ring->blkif->vbd.overflow_max_grants = 0;

		/* We can defer this work */
		schedule_work(&ring->persistent_purge_work);
	pr_debug("Purged %u/%u\n", (total - num_clean), total);
		pr_debug("Purged %u/%u\n", num_clean, total);
	}

out:
	return;
+2 −12
Original line number Diff line number Diff line
@@ -233,16 +233,6 @@ struct xen_vbd {

struct backend_info;

/* Number of available flags */
#define PERSISTENT_GNT_FLAGS_SIZE	2
/* This persistent grant is currently in use */
#define PERSISTENT_GNT_ACTIVE		0
/*
 * This persistent grant has been used, this flag is set when we remove the
 * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
 */
#define PERSISTENT_GNT_WAS_ACTIVE	1

/* Number of requests that we can fit in a ring */
#define XEN_BLKIF_REQS_PER_PAGE		32

@@ -250,7 +240,8 @@ struct persistent_gnt {
	struct page *page;
	grant_ref_t gnt;
	grant_handle_t handle;
	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
	unsigned long last_used;
	bool active;
	struct rb_node node;
	struct list_head remove_node;
};
@@ -278,7 +269,6 @@ struct xen_blkif_ring {
	wait_queue_head_t	pending_free_wq;

	/* Tree to store persistent grants. */
	spinlock_t		pers_gnts_lock;
	struct rb_root		persistent_gnts;
	unsigned int		persistent_gnt_c;
	atomic_t		persistent_gnt_in_use;
+98 −12
Original line number Diff line number Diff line
@@ -46,6 +46,7 @@
#include <linux/scatterlist.h>
#include <linux/bitmap.h>
#include <linux/list.h>
#include <linux/workqueue.h>

#include <xen/xen.h>
#include <xen/xenbus.h>
@@ -121,6 +122,8 @@ static inline struct blkif_req *blkif_req(struct request *rq)

static DEFINE_MUTEX(blkfront_mutex);
static const struct block_device_operations xlvbd_block_fops;
static struct delayed_work blkfront_work;
static LIST_HEAD(info_list);

/*
 * Maximum number of segments in indirect requests, the actual value used by
@@ -216,6 +219,7 @@ struct blkfront_info
	/* Save uncomplete reqs and bios for migration. */
	struct list_head requests;
	struct bio_list bio_list;
	struct list_head info_list;
};

static unsigned int nr_minors;
@@ -1759,6 +1763,12 @@ static int write_per_ring_nodes(struct xenbus_transaction xbt,
	return err;
}

static void free_info(struct blkfront_info *info)
{
	list_del(&info->info_list);
	kfree(info);
}

/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
			   struct blkfront_info *info)
@@ -1880,7 +1890,10 @@ static int talk_to_blkback(struct xenbus_device *dev,
 destroy_blkring:
	blkif_free(info, 0);

	kfree(info);
	mutex_lock(&blkfront_mutex);
	free_info(info);
	mutex_unlock(&blkfront_mutex);

	dev_set_drvdata(&dev->dev, NULL);

	return err;
@@ -1991,6 +2004,10 @@ static int blkfront_probe(struct xenbus_device *dev,
	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
	dev_set_drvdata(&dev->dev, info);

	mutex_lock(&blkfront_mutex);
	list_add(&info->info_list, &info_list);
	mutex_unlock(&blkfront_mutex);

	return 0;
}

@@ -2301,6 +2318,12 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
	if (indirect_segments <= BLKIF_MAX_SEGMENTS_PER_REQUEST)
		indirect_segments = 0;
	info->max_indirect_segments = indirect_segments;

	if (info->feature_persistent) {
		mutex_lock(&blkfront_mutex);
		schedule_delayed_work(&blkfront_work, HZ * 10);
		mutex_unlock(&blkfront_mutex);
	}
}

/*
@@ -2482,7 +2505,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
	mutex_unlock(&info->mutex);

	if (!bdev) {
		kfree(info);
		mutex_lock(&blkfront_mutex);
		free_info(info);
		mutex_unlock(&blkfront_mutex);
		return 0;
	}

@@ -2502,7 +2527,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
	if (info && !bdev->bd_openers) {
		xlvbd_release_gendisk(info);
		disk->private_data = NULL;
		kfree(info);
		mutex_lock(&blkfront_mutex);
		free_info(info);
		mutex_unlock(&blkfront_mutex);
	}

	mutex_unlock(&bdev->bd_mutex);
@@ -2585,7 +2612,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode)
		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
		xlvbd_release_gendisk(info);
		disk->private_data = NULL;
		kfree(info);
		free_info(info);
	}

out:
@@ -2618,6 +2645,61 @@ static struct xenbus_driver blkfront_driver = {
	.is_ready = blkfront_is_ready,
};

static void purge_persistent_grants(struct blkfront_info *info)
{
	unsigned int i;
	unsigned long flags;

	for (i = 0; i < info->nr_rings; i++) {
		struct blkfront_ring_info *rinfo = &info->rinfo[i];
		struct grant *gnt_list_entry, *tmp;

		spin_lock_irqsave(&rinfo->ring_lock, flags);

		if (rinfo->persistent_gnts_c == 0) {
			spin_unlock_irqrestore(&rinfo->ring_lock, flags);
			continue;
		}

		list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
					 node) {
			if (gnt_list_entry->gref == GRANT_INVALID_REF ||
			    gnttab_query_foreign_access(gnt_list_entry->gref))
				continue;

			list_del(&gnt_list_entry->node);
			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
			rinfo->persistent_gnts_c--;
			__free_page(gnt_list_entry->page);
			kfree(gnt_list_entry);
		}

		spin_unlock_irqrestore(&rinfo->ring_lock, flags);
	}
}

static void blkfront_delay_work(struct work_struct *work)
{
	struct blkfront_info *info;
	bool need_schedule_work = false;

	mutex_lock(&blkfront_mutex);

	list_for_each_entry(info, &info_list, info_list) {
		if (info->feature_persistent) {
			need_schedule_work = true;
			mutex_lock(&info->mutex);
			purge_persistent_grants(info);
			mutex_unlock(&info->mutex);
		}
	}

	if (need_schedule_work)
		schedule_delayed_work(&blkfront_work, HZ * 10);

	mutex_unlock(&blkfront_mutex);
}

static int __init xlblk_init(void)
{
	int ret;
@@ -2626,6 +2708,15 @@ static int __init xlblk_init(void)
	if (!xen_domain())
		return -ENODEV;

	if (!xen_has_pv_disk_devices())
		return -ENODEV;

	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
		pr_warn("xen_blk: can't get major %d with name %s\n",
			XENVBD_MAJOR, DEV_NAME);
		return -ENODEV;
	}

	if (xen_blkif_max_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
		xen_blkif_max_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;

@@ -2641,14 +2732,7 @@ static int __init xlblk_init(void)
		xen_blkif_max_queues = nr_cpus;
	}

	if (!xen_has_pv_disk_devices())
		return -ENODEV;

	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
		printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
		       XENVBD_MAJOR, DEV_NAME);
		return -ENODEV;
	}
	INIT_DELAYED_WORK(&blkfront_work, blkfront_delay_work);

	ret = xenbus_register_frontend(&blkfront_driver);
	if (ret) {
@@ -2663,6 +2747,8 @@ module_init(xlblk_init);

static void __exit xlblk_exit(void)
{
	cancel_delayed_work_sync(&blkfront_work);

	xenbus_unregister_driver(&blkfront_driver);
	unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
	kfree(minors);