Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8fa72d23 authored by Jeff Moyer's avatar Jeff Moyer Committed by Jens Axboe
Browse files

bdi: add a user-tunable cpu_list for the bdi flusher threads



In realtime environments, it may be desirable to keep the per-bdi
flusher threads from running on certain cpus.  This patch adds a
cpu_list file to /sys/class/bdi/* to enable this.  The default is to tie
the flusher threads to the same numa node as the backing device (though
I could be convinced to make it a mask of all cpus to avoid a change in
behaviour).

Thanks to Jeremy Eder for the original idea.

Signed-off-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c304a51b
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/atomic.h>
#include <linux/sysctl.h>
#include <linux/mutex.h>

struct page;
struct device;
@@ -105,6 +106,9 @@ struct backing_dev_info {

	struct timer_list laptop_mode_wb_timer;

	cpumask_t *flusher_cpumask; /* used for writeback thread scheduling */
	struct mutex flusher_cpumask_lock;

#ifdef CONFIG_DEBUG_FS
	struct dentry *debug_dir;
	struct dentry *debug_stats;
+84 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/writeback.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <trace/events/writeback.h>

static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
@@ -221,12 +222,63 @@ static ssize_t max_ratio_store(struct device *dev,
}
BDI_SHOW(max_ratio, bdi->max_ratio)

static ssize_t cpu_list_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t count)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	struct bdi_writeback *wb = &bdi->wb;
	cpumask_var_t newmask;
	ssize_t ret;
	struct task_struct *task;

	if (!alloc_cpumask_var(&newmask, GFP_KERNEL))
		return -ENOMEM;

	ret = cpulist_parse(buf, newmask);
	if (!ret) {
		spin_lock_bh(&bdi->wb_lock);
		task = wb->task;
		if (task)
			get_task_struct(task);
		spin_unlock_bh(&bdi->wb_lock);

		mutex_lock(&bdi->flusher_cpumask_lock);
		if (task) {
			ret = set_cpus_allowed_ptr(task, newmask);
			put_task_struct(task);
		}
		if (ret == 0) {
			cpumask_copy(bdi->flusher_cpumask, newmask);
			ret = count;
		}
		mutex_unlock(&bdi->flusher_cpumask_lock);

	}
	free_cpumask_var(newmask);

	return ret;
}

static ssize_t cpu_list_show(struct device *dev,
		struct device_attribute *attr, char *page)
{
	struct backing_dev_info *bdi = dev_get_drvdata(dev);
	ssize_t ret;

	mutex_lock(&bdi->flusher_cpumask_lock);
	ret = cpulist_scnprintf(page, PAGE_SIZE-1, bdi->flusher_cpumask);
	mutex_unlock(&bdi->flusher_cpumask_lock);

	return ret;
}

#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)

static struct device_attribute bdi_dev_attrs[] = {
	__ATTR_RW(read_ahead_kb),
	__ATTR_RW(min_ratio),
	__ATTR_RW(max_ratio),
	__ATTR_RW(cpu_list),
	__ATTR_NULL,
};

@@ -428,6 +480,7 @@ static int bdi_forker_thread(void *ptr)
				writeback_inodes_wb(&bdi->wb, 1024,
						    WB_REASON_FORKER_THREAD);
			} else {
				int ret;
				/*
				 * The spinlock makes sure we do not lose
				 * wake-ups when racing with 'bdi_queue_work()'.
@@ -437,6 +490,14 @@ static int bdi_forker_thread(void *ptr)
				spin_lock_bh(&bdi->wb_lock);
				bdi->wb.task = task;
				spin_unlock_bh(&bdi->wb_lock);
				mutex_lock(&bdi->flusher_cpumask_lock);
				ret = set_cpus_allowed_ptr(task,
							bdi->flusher_cpumask);
				mutex_unlock(&bdi->flusher_cpumask_lock);
				if (ret)
					printk_once("%s: failed to bind flusher"
						    " thread %s, error %d\n",
						    __func__, task->comm, ret);
				wake_up_process(task);
			}
			bdi_clear_pending(bdi);
@@ -509,6 +570,17 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
						dev_name(dev));
		if (IS_ERR(wb->task))
			return PTR_ERR(wb->task);
	} else {
		int node;
		/*
		 * Set up a default cpumask for the flusher threads that
		 * includes all cpus on the same numa node as the device.
		 * The mask may be overridden via sysfs.
		 */
		node = dev_to_node(bdi->dev);
		if (node != NUMA_NO_NODE)
			cpumask_copy(bdi->flusher_cpumask,
				     cpumask_of_node(node));
	}

	bdi_debug_register(bdi, dev_name(dev));
@@ -634,6 +706,15 @@ int bdi_init(struct backing_dev_info *bdi)

	bdi_wb_init(&bdi->wb, bdi);

	if (!bdi_cap_flush_forker(bdi)) {
		bdi->flusher_cpumask = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
		if (!bdi->flusher_cpumask)
			return -ENOMEM;
		cpumask_setall(bdi->flusher_cpumask);
		mutex_init(&bdi->flusher_cpumask_lock);
	} else
		bdi->flusher_cpumask = NULL;

	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
		if (err)
@@ -656,6 +737,7 @@ int bdi_init(struct backing_dev_info *bdi)
err:
		while (i--)
			percpu_counter_destroy(&bdi->bdi_stat[i]);
		kfree(bdi->flusher_cpumask);
	}

	return err;
@@ -683,6 +765,8 @@ void bdi_destroy(struct backing_dev_info *bdi)

	bdi_unregister(bdi);

	kfree(bdi->flusher_cpumask);

	/*
	 * If bdi_unregister() had already been called earlier, the
	 * wakeup_timer could still be armed because bdi_prune_sb()