Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ef4d43a8 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

ksm: stop hotremove lockdep warning



Complaints are rare, but lockdep still does not understand the way
ksm_memory_callback(MEM_GOING_OFFLINE) takes ksm_thread_mutex, and holds
it until the ksm_memory_callback(MEM_OFFLINE): that appears to be a
problem because notifier callbacks are made under down_read of
blocking_notifier_head->rwsem (so first the mutex is taken while holding
the rwsem, then later the rwsem is taken while still holding the mutex);
but is not in fact a problem because mem_hotplug_mutex is held
throughout the dance.

There was an attempt to fix this with mutex_lock_nested(); but if that
happened to fool lockdep two years ago, apparently it does so no longer.

I had hoped to eradicate this issue in extending KSM page migration not
to need the ksm_thread_mutex.  But then realized that although the page
migration itself is safe, we do still need to lock out ksmd and other
users of get_ksm_page() while offlining memory - at some point between
MEM_GOING_OFFLINE and MEM_OFFLINE, the struct pages themselves may
vanish, and get_ksm_page()'s accesses to them become a violation.

So, give up on holding ksm_thread_mutex itself from MEM_GOING_OFFLINE to
MEM_OFFLINE, and add a KSM_RUN_OFFLINE flag, and wait_while_offlining()
checks, to achieve the same lockout without being caught by lockdep.
This is less elegant for KSM, but it's more important to keep lockdep
useful to other users - and I apologize for how long it took to fix.

Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Reported-by: default avatarGerald Schaefer <gerald.schaefer@de.ibm.com>
Tested-by: default avatarGerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9c620e2b
Loading
Loading
Loading
Loading
+41 −14
Original line number Diff line number Diff line
@@ -226,7 +226,9 @@ static unsigned int ksm_merge_across_nodes = 1;
#define KSM_RUN_STOP	0
#define KSM_RUN_MERGE	1
#define KSM_RUN_UNMERGE	2
static unsigned int ksm_run = KSM_RUN_STOP;
#define KSM_RUN_OFFLINE	4
static unsigned long ksm_run = KSM_RUN_STOP;
static void wait_while_offlining(void);

static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
static DEFINE_MUTEX(ksm_thread_mutex);
@@ -1700,6 +1702,7 @@ static int ksm_scan_thread(void *nothing)

	while (!kthread_should_stop()) {
		mutex_lock(&ksm_thread_mutex);
		wait_while_offlining();
		if (ksmd_should_run())
			ksm_do_scan(ksm_thread_pages_to_scan);
		mutex_unlock(&ksm_thread_mutex);
@@ -2056,6 +2059,22 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
#endif /* CONFIG_MIGRATION */

#ifdef CONFIG_MEMORY_HOTREMOVE
static int just_wait(void *word)
{
	schedule();
	return 0;
}

static void wait_while_offlining(void)
{
	while (ksm_run & KSM_RUN_OFFLINE) {
		mutex_unlock(&ksm_thread_mutex);
		wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
				just_wait, TASK_UNINTERRUPTIBLE);
		mutex_lock(&ksm_thread_mutex);
	}
}

static void ksm_check_stable_tree(unsigned long start_pfn,
				  unsigned long end_pfn)
{
@@ -2098,15 +2117,15 @@ static int ksm_memory_callback(struct notifier_block *self,
	switch (action) {
	case MEM_GOING_OFFLINE:
		/*
		 * Keep it very simple for now: just lock out ksmd and
		 * MADV_UNMERGEABLE while any memory is going offline.
		 * mutex_lock_nested() is necessary because lockdep was alarmed
		 * that here we take ksm_thread_mutex inside notifier chain
		 * mutex, and later take notifier chain mutex inside
		 * ksm_thread_mutex to unlock it.   But that's safe because both
		 * are inside mem_hotplug_mutex.
		 */
		mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
		 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
		 * and remove_all_stable_nodes() while memory is going offline:
		 * it is unsafe for them to touch the stable tree at this time.
		 * But unmerge_ksm_pages(), rmap lookups and other entry points
		 * which do not need the ksm_thread_mutex are all safe.
		 */
		mutex_lock(&ksm_thread_mutex);
		ksm_run |= KSM_RUN_OFFLINE;
		mutex_unlock(&ksm_thread_mutex);
		break;

	case MEM_OFFLINE:
@@ -2122,11 +2141,20 @@ static int ksm_memory_callback(struct notifier_block *self,
		/* fallthrough */

	case MEM_CANCEL_OFFLINE:
		mutex_lock(&ksm_thread_mutex);
		ksm_run &= ~KSM_RUN_OFFLINE;
		mutex_unlock(&ksm_thread_mutex);

		smp_mb();	/* wake_up_bit advises this */
		wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
		break;
	}
	return NOTIFY_OK;
}
#else
static void wait_while_offlining(void)
{
}
#endif /* CONFIG_MEMORY_HOTREMOVE */

#ifdef CONFIG_SYSFS
@@ -2189,7 +2217,7 @@ KSM_ATTR(pages_to_scan);
static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
			char *buf)
{
	return sprintf(buf, "%u\n", ksm_run);
	return sprintf(buf, "%lu\n", ksm_run);
}

static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -2212,6 +2240,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
	 */

	mutex_lock(&ksm_thread_mutex);
	wait_while_offlining();
	if (ksm_run != flags) {
		ksm_run = flags;
		if (flags & KSM_RUN_UNMERGE) {
@@ -2254,6 +2283,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
		return -EINVAL;

	mutex_lock(&ksm_thread_mutex);
	wait_while_offlining();
	if (ksm_merge_across_nodes != knob) {
		if (ksm_pages_shared || remove_all_stable_nodes())
			err = -EBUSY;
@@ -2366,10 +2396,7 @@ static int __init ksm_init(void)
#endif /* CONFIG_SYSFS */

#ifdef CONFIG_MEMORY_HOTREMOVE
	/*
	 * Choose a high priority since the callback takes ksm_thread_mutex:
	 * later callbacks could only be taking locks which nest within that.
	 */
	/* There is no significance to this priority 100 */
	hotplug_memory_notifier(ksm_memory_callback, 100);
#endif
	return 0;