Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9661d5bc authored by Yasuaki Ishimatsu's avatar Yasuaki Ishimatsu Committed by Ingo Molnar
Browse files

x86/mm/hotplug: Modify PGD entry when removing memory



When hot-adding/removing memory, sync_global_pgds() is called
for synchronizing PGD to PGD entries of all processes MM.  But
when hot-removing memory, sync_global_pgds() does not work
correctly.

At first, sync_global_pgds() checks whether target PGD is none
or not.  And if PGD is none, the PGD is skipped.  But when
hot-removing memory, PGD may be none since PGD may be cleared by
free_pud_table().  So when sync_global_pgds() is called after
hot-removing memory, sync_global_pgds() should not skip PGD even
if the PGD is none.  And sync_global_pgds() must clear PGD
entries of all processes MM.

Currently sync_global_pgds() does not clear PGD entries of all
processes MM when hot-removing memory.  So when hot adding
memory which is same memory range as removed memory after
hot-removing memory, following call traces are shown:

 kernel BUG at arch/x86/mm/init_64.c:206!
 ...
 [<ffffffff815e0c80>] kernel_physical_mapping_init+0x1b2/0x1d2
 [<ffffffff815ced94>] init_memory_mapping+0x1d4/0x380
 [<ffffffff8104aebd>] arch_add_memory+0x3d/0xd0
 [<ffffffff815d03d9>] add_memory+0xb9/0x1b0
 [<ffffffff81352415>] acpi_memory_device_add+0x1af/0x28e
 [<ffffffff81325dc4>] acpi_bus_device_attach+0x8c/0xf0
 [<ffffffff813413b9>] acpi_ns_walk_namespace+0xc8/0x17f
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff81325d38>] ? acpi_bus_type_and_status+0xb7/0xb7
 [<ffffffff813418ed>] acpi_walk_namespace+0x95/0xc5
 [<ffffffff81326b4c>] acpi_bus_scan+0x9a/0xc2
 [<ffffffff81326bff>] acpi_scan_bus_device_check+0x8b/0x12e
 [<ffffffff81326cb5>] acpi_scan_device_check+0x13/0x15
 [<ffffffff81320122>] acpi_os_execute_deferred+0x25/0x32
 [<ffffffff8107e02b>] process_one_work+0x17b/0x460
 [<ffffffff8107edfb>] worker_thread+0x11b/0x400
 [<ffffffff8107ece0>] ? rescuer_thread+0x400/0x400
 [<ffffffff81085aef>] kthread+0xcf/0xe0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140
 [<ffffffff815fc76c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81085a20>] ? kthread_create_on_node+0x140/0x140

This patch clears PGD entries of all processes MM when
sync_global_pgds() is called after hot-removing memory

Signed-off-by: default avatarYasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: default avatarToshi Kani <toshi.kani@hp.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Gu Zheng <guz.fnst@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 5255e0a7
Loading
Loading
Loading
Loading
+2 −1
Original line number Original line Diff line number Diff line
@@ -115,7 +115,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
	native_set_pgd(pgd, native_make_pgd(0));
	native_set_pgd(pgd, native_make_pgd(0));
}
}


extern void sync_global_pgds(unsigned long start, unsigned long end);
extern void sync_global_pgds(unsigned long start, unsigned long end,
			     int removed);


/*
/*
 * Conversion functions: convert a page and protection to a page entry,
 * Conversion functions: convert a page and protection to a page entry,
+1 −1
Original line number Original line Diff line number Diff line
@@ -350,7 +350,7 @@ static void dump_pagetable(unsigned long address)


void vmalloc_sync_all(void)
void vmalloc_sync_all(void)
{
{
	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END, 0);
}
}


/*
/*
+19 −8
Original line number Original line Diff line number Diff line
@@ -178,7 +178,7 @@ __setup("noexec32=", nonx32_setup);
 * When memory was added/removed make sure all the processes MM have
 * When memory was added/removed make sure all the processes MM have
 * suitable PGD entries in the local PGD level page.
 * suitable PGD entries in the local PGD level page.
 */
 */
void sync_global_pgds(unsigned long start, unsigned long end)
void sync_global_pgds(unsigned long start, unsigned long end, int removed)
{
{
	unsigned long address;
	unsigned long address;


@@ -186,7 +186,12 @@ void sync_global_pgds(unsigned long start, unsigned long end)
		const pgd_t *pgd_ref = pgd_offset_k(address);
		const pgd_t *pgd_ref = pgd_offset_k(address);
		struct page *page;
		struct page *page;


		if (pgd_none(*pgd_ref))
		/*
		 * When it is called after memory hot remove, pgd_none()
		 * returns true. In this case (removed == 1), we must clear
		 * the PGD entries in the local PGD level page.
		 */
		if (pgd_none(*pgd_ref) && !removed)
			continue;
			continue;


		spin_lock(&pgd_lock);
		spin_lock(&pgd_lock);
@@ -199,12 +204,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
			spin_lock(pgt_lock);
			spin_lock(pgt_lock);


			if (pgd_none(*pgd))
			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
				set_pgd(pgd, *pgd_ref);
			else
				BUG_ON(pgd_page_vaddr(*pgd)
				BUG_ON(pgd_page_vaddr(*pgd)
				       != pgd_page_vaddr(*pgd_ref));
				       != pgd_page_vaddr(*pgd_ref));


			if (removed) {
				if (pgd_none(*pgd_ref) && !pgd_none(*pgd))
					pgd_clear(pgd);
			} else {
				if (pgd_none(*pgd))
					set_pgd(pgd, *pgd_ref);
			}

			spin_unlock(pgt_lock);
			spin_unlock(pgt_lock);
		}
		}
		spin_unlock(&pgd_lock);
		spin_unlock(&pgd_lock);
@@ -633,7 +644,7 @@ kernel_physical_mapping_init(unsigned long start,
	}
	}


	if (pgd_changed)
	if (pgd_changed)
		sync_global_pgds(addr, end - 1);
		sync_global_pgds(addr, end - 1, 0);


	__flush_tlb_all();
	__flush_tlb_all();


@@ -995,7 +1006,7 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
	}
	}


	if (pgd_changed)
	if (pgd_changed)
		sync_global_pgds(start, end - 1);
		sync_global_pgds(start, end - 1, 1);


	flush_tlb_all();
	flush_tlb_all();
}
}
@@ -1342,7 +1353,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
	else
	else
		err = vmemmap_populate_basepages(start, end, node);
		err = vmemmap_populate_basepages(start, end, node);
	if (!err)
	if (!err)
		sync_global_pgds(start, end - 1);
		sync_global_pgds(start, end - 1, 0);
	return err;
	return err;
}
}