Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 50aa98ba authored by Martin Schwidefsky's avatar Martin Schwidefsky
Browse files

[S390] fix recursive locking on page_table_lock



Suzuki Poulose reported the following recursive locking bug on s390:

Here is the stack trace : (see Appendix I for more info)

  [<0000000000406ed6>] _spin_lock+0x52/0x94
  [<0000000000103bde>] crst_table_free+0x14e/0x1a4
  [<00000000001ba684>] __pmd_alloc+0x114/0x1ec
  [<00000000001be8d0>] handle_mm_fault+0x2cc/0xb80
  [<0000000000407d62>] do_dat_exception+0x2b6/0x3a0
  [<0000000000114f8c>] sysc_return+0x0/0x8
  [<00000200001642b2>] 0x200001642b2

The page_table_lock is already acquired in __pmd_alloc (mm/memory.c) and
it tries to populate the pud/pgd with a new pmd allocated. If another
thread populates it before we get a chance, we free the pmd using
pmd_free().

On s390x, pmd_free(even pud_free ) is #defined to crst_table_free(),
which acquires the page_table_lock to protect the crst_table index updates.

Hence this ends up in a recursive locking of the page_table_lock.

The solution suggested by Dave Hansen is to use a new spin lock in the mmu
context to protect the access to the crst_list and the pgtable_list.

Reported-by: default avatarSuzuki Poulose <suzuki@in.ibm.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent c4de0c1a
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -2,6 +2,7 @@
#define __MMU_H
#define __MMU_H


typedef struct {
typedef struct {
	spinlock_t list_lock;
	struct list_head crst_list;
	struct list_head crst_list;
	struct list_head pgtable_list;
	struct list_head pgtable_list;
	unsigned long asce_bits;
	unsigned long asce_bits;
+1 −0
Original line number Original line Diff line number Diff line
@@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)


static inline pgd_t *pgd_alloc(struct mm_struct *mm)
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
{
	spin_lock_init(&mm->context.list_lock);
	INIT_LIST_HEAD(&mm->context.crst_list);
	INIT_LIST_HEAD(&mm->context.crst_list);
	INIT_LIST_HEAD(&mm->context.pgtable_list);
	INIT_LIST_HEAD(&mm->context.pgtable_list);
	return (pgd_t *) crst_table_alloc(mm, s390_noexec);
	return (pgd_t *) crst_table_alloc(mm, s390_noexec);
+12 −12
Original line number Original line Diff line number Diff line
@@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
		}
		}
		page->index = page_to_phys(shadow);
		page->index = page_to_phys(shadow);
	}
	}
	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.list_lock);
	list_add(&page->lru, &mm->context.crst_list);
	list_add(&page->lru, &mm->context.crst_list);
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.list_lock);
	return (unsigned long *) page_to_phys(page);
	return (unsigned long *) page_to_phys(page);
}
}


@@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
	unsigned long *shadow = get_shadow_table(table);
	unsigned long *shadow = get_shadow_table(table);
	struct page *page = virt_to_page(table);
	struct page *page = virt_to_page(table);


	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.list_lock);
	list_del(&page->lru);
	list_del(&page->lru);
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.list_lock);
	if (shadow)
	if (shadow)
		free_pages((unsigned long) shadow, ALLOC_ORDER);
		free_pages((unsigned long) shadow, ALLOC_ORDER);
	free_pages((unsigned long) table, ALLOC_ORDER);
	free_pages((unsigned long) table, ALLOC_ORDER);
@@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
	unsigned long bits;
	unsigned long bits;


	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.list_lock);
	page = NULL;
	page = NULL;
	if (!list_empty(&mm->context.pgtable_list)) {
	if (!list_empty(&mm->context.pgtable_list)) {
		page = list_first_entry(&mm->context.pgtable_list,
		page = list_first_entry(&mm->context.pgtable_list,
@@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
			page = NULL;
			page = NULL;
	}
	}
	if (!page) {
	if (!page) {
		spin_unlock(&mm->page_table_lock);
		spin_unlock(&mm->context.list_lock);
		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
		if (!page)
		if (!page)
			return NULL;
			return NULL;
@@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
			clear_table_pgstes(table);
			clear_table_pgstes(table);
		else
		else
			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
		spin_lock(&mm->page_table_lock);
		spin_lock(&mm->context.list_lock);
		list_add(&page->lru, &mm->context.pgtable_list);
		list_add(&page->lru, &mm->context.pgtable_list);
	}
	}
	table = (unsigned long *) page_to_phys(page);
	table = (unsigned long *) page_to_phys(page);
@@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
	page->flags |= bits;
	page->flags |= bits;
	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
		list_move_tail(&page->lru, &mm->context.pgtable_list);
		list_move_tail(&page->lru, &mm->context.pgtable_list);
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.list_lock);
	return table;
	return table;
}
}


@@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
	bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.list_lock);
	page->flags ^= bits;
	page->flags ^= bits;
	if (page->flags & FRAG_MASK) {
	if (page->flags & FRAG_MASK) {
		/* Page now has some free pgtable fragments. */
		/* Page now has some free pgtable fragments. */
@@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
	} else
	} else
		/* All fragments of the 4K page have been freed. */
		/* All fragments of the 4K page have been freed. */
		list_del(&page->lru);
		list_del(&page->lru);
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.list_lock);
	if (page) {
	if (page) {
		pgtable_page_dtor(page);
		pgtable_page_dtor(page);
		__free_page(page);
		__free_page(page);
@@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
{
{
	struct page *page;
	struct page *page;


	spin_lock(&mm->page_table_lock);
	spin_lock(&mm->context.list_lock);
	/* Free shadow region and segment tables. */
	/* Free shadow region and segment tables. */
	list_for_each_entry(page, &mm->context.crst_list, lru)
	list_for_each_entry(page, &mm->context.crst_list, lru)
		if (page->index) {
		if (page->index) {
@@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
	/* "Free" second halves of page tables. */
	/* "Free" second halves of page tables. */
	list_for_each_entry(page, &mm->context.pgtable_list, lru)
	list_for_each_entry(page, &mm->context.pgtable_list, lru)
		page->flags &= ~SECOND_HALVES;
		page->flags &= ~SECOND_HALVES;
	spin_unlock(&mm->page_table_lock);
	spin_unlock(&mm->context.list_lock);
	mm->context.noexec = 0;
	mm->context.noexec = 0;
	update_mm(mm, tsk);
	update_mm(mm, tsk);
}
}
+1 −0
Original line number Original line Diff line number Diff line
@@ -331,6 +331,7 @@ void __init vmem_map_init(void)
	unsigned long start, end;
	unsigned long start, end;
	int i;
	int i;


	spin_lock_init(&init_mm.context.list_lock);
	INIT_LIST_HEAD(&init_mm.context.crst_list);
	INIT_LIST_HEAD(&init_mm.context.crst_list);
	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
	INIT_LIST_HEAD(&init_mm.context.pgtable_list);
	init_mm.context.noexec = 0;
	init_mm.context.noexec = 0;