Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4c21e2f2 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

[PATCH] mm: split page table lock



Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.

This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock.  (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)

In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.

Splitting the lock is not quite for free: another cacheline access.  Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS.  But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.

There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.

Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b38c6845
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -229,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
	pte = pmd_page(*pmd);
	pte = pmd_page(*pmd);
	pmd_clear(pmd);
	pmd_clear(pmd);
	dec_page_state(nr_page_table_pages);
	dec_page_state(nr_page_table_pages);
	pte_lock_deinit(pte);
	pte_free(pte);
	pte_free(pte);
	pmd_free(pmd);
	pmd_free(pmd);
free:
free:
+2 −2
Original line number Original line Diff line number Diff line
@@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
	if (pgd_list)
	if (pgd_list)
		pgd_list->private = (unsigned long) &page->index;
		pgd_list->private = (unsigned long) &page->index;
	pgd_list = page;
	pgd_list = page;
	page->private = (unsigned long) &pgd_list;
	set_page_private(page, (unsigned long)&pgd_list);
}
}


static inline void pgd_list_del(pgd_t *pgd)
static inline void pgd_list_del(pgd_t *pgd)
{
{
	struct page *next, **pprev, *page = virt_to_page(pgd);
	struct page *next, **pprev, *page = virt_to_page(pgd);
	next = (struct page *) page->index;
	next = (struct page *) page->index;
	pprev = (struct page **) page->private;
	pprev = (struct page **)page_private(page);
	*pprev = next;
	*pprev = next;
	if (next)
	if (next)
		next->private = (unsigned long) pprev;
		next->private = (unsigned long) pprev;
+4 −4
Original line number Original line Diff line number Diff line
@@ -188,19 +188,19 @@ static inline void pgd_list_add(pgd_t *pgd)
	struct page *page = virt_to_page(pgd);
	struct page *page = virt_to_page(pgd);
	page->index = (unsigned long)pgd_list;
	page->index = (unsigned long)pgd_list;
	if (pgd_list)
	if (pgd_list)
		pgd_list->private = (unsigned long)&page->index;
		set_page_private(pgd_list, (unsigned long)&page->index);
	pgd_list = page;
	pgd_list = page;
	page->private = (unsigned long)&pgd_list;
	set_page_private(page, (unsigned long)&pgd_list);
}
}


static inline void pgd_list_del(pgd_t *pgd)
static inline void pgd_list_del(pgd_t *pgd)
{
{
	struct page *next, **pprev, *page = virt_to_page(pgd);
	struct page *next, **pprev, *page = virt_to_page(pgd);
	next = (struct page *)page->index;
	next = (struct page *)page->index;
	pprev = (struct page **)page->private;
	pprev = (struct page **)page_private(page);
	*pprev = next;
	*pprev = next;
	if (next)
	if (next)
		next->private = (unsigned long)pprev;
		set_page_private(next, (unsigned long)pprev);
}
}


void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
+1 −0
Original line number Original line Diff line number Diff line
@@ -144,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)


	if(!proc_mm || !ptrace_faultinfo){
	if(!proc_mm || !ptrace_faultinfo){
		free_page(mmu->id.stack);
		free_page(mmu->id.stack);
		pte_lock_deinit(virt_to_page(mmu->last_page_table));
		pte_free_kernel((pte_t *) mmu->last_page_table);
		pte_free_kernel((pte_t *) mmu->last_page_table);
                dec_page_state(nr_page_table_pages);
                dec_page_state(nr_page_table_pages);
#ifdef CONFIG_3_LEVEL_PGTABLES
#ifdef CONFIG_3_LEVEL_PGTABLES
+2 −2
Original line number Original line Diff line number Diff line
@@ -291,8 +291,8 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
		cachefs_uncache_page(vnode->cache, page);
		cachefs_uncache_page(vnode->cache, page);
#endif
#endif


		pageio = (struct cachefs_page *) page->private;
		pageio = (struct cachefs_page *) page_private(page);
		page->private = 0;
		set_page_private(page, 0);
		ClearPagePrivate(page);
		ClearPagePrivate(page);


		if (pageio)
		if (pageio)
Loading