Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5fcf7bb7 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds
Browse files

[PATCH] read_zero_pagealigned() locking fix



Ramiro Voicu hits the BUG_ON(!pte_none(*pte)) in zeromap_pte_range: kernel
bugzilla 7645.  Right: read_zero_pagealigned uses down_read of mmap_sem,
but another thread's racing read of /dev/zero, or a normal fault, can
easily set that pte again, in between zap_page_range and zeromap_page_range
getting there.  It's been wrong ever since 2.4.3.

The simple fix is to use down_write instead, but that would serialize reads
of /dev/zero more than at present: perhaps some app would be badly
affected.  So instead let zeromap_page_range return the error instead of
BUG_ON, and read_zero_pagealigned break to the slower clear_user loop in
that case - there's no need to optimize for it.

Use -EEXIST for when a pte is found: BUG_ON in mmap_zero (the other user of
zeromap_page_range), though it really isn't interesting there.  And since
mmap_zero wants -EAGAIN for out-of-memory, the zeromaps better return that
than -ENOMEM.

Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Cc: Ramiro Voicu: <Ramiro.Voicu@cern.ch>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 347a00fb
Loading
Loading
Loading
Loading
+8 −4
Original line number Diff line number Diff line
@@ -646,7 +646,8 @@ static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
			count = size;

		zap_page_range(vma, addr, count, NULL);
        	zeromap_page_range(vma, addr, count, PAGE_COPY);
        	if (zeromap_page_range(vma, addr, count, PAGE_COPY))
			break;

		size -= count;
		buf += count;
@@ -713,11 +714,14 @@ static ssize_t read_zero(struct file * file, char __user * buf,

static int mmap_zero(struct file * file, struct vm_area_struct * vma)
{
	int err;

	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	if (zeromap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, vma->vm_page_prot))
		return -EAGAIN;
	return 0;
	err = zeromap_page_range(vma, vma->vm_start,
			vma->vm_end - vma->vm_start, vma->vm_page_prot);
	BUG_ON(err == -EEXIST);
	return err;
}
#else /* CONFIG_MMU */
static ssize_t read_zero(struct file * file, char * buf, 
+21 −11
Original line number Diff line number Diff line
@@ -1110,23 +1110,29 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
{
	pte_t *pte;
	spinlock_t *ptl;
	int err = 0;

	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
	if (!pte)
		return -ENOMEM;
		return -EAGAIN;
	arch_enter_lazy_mmu_mode();
	do {
		struct page *page = ZERO_PAGE(addr);
		pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));

		if (unlikely(!pte_none(*pte))) {
			err = -EEXIST;
			pte++;
			break;
		}
		page_cache_get(page);
		page_add_file_rmap(page);
		inc_mm_counter(mm, file_rss);
		BUG_ON(!pte_none(*pte));
		set_pte_at(mm, addr, pte, zero_pte);
	} while (pte++, addr += PAGE_SIZE, addr != end);
	arch_leave_lazy_mmu_mode();
	pte_unmap_unlock(pte - 1, ptl);
	return 0;
	return err;
}

static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1134,16 +1140,18 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
{
	pmd_t *pmd;
	unsigned long next;
	int err;

	pmd = pmd_alloc(mm, pud, addr);
	if (!pmd)
		return -ENOMEM;
		return -EAGAIN;
	do {
		next = pmd_addr_end(addr, end);
		if (zeromap_pte_range(mm, pmd, addr, next, prot))
			return -ENOMEM;
		err = zeromap_pte_range(mm, pmd, addr, next, prot);
		if (err)
			break;
	} while (pmd++, addr = next, addr != end);
	return 0;
	return err;
}

static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
@@ -1151,16 +1159,18 @@ static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
{
	pud_t *pud;
	unsigned long next;
	int err;

	pud = pud_alloc(mm, pgd, addr);
	if (!pud)
		return -ENOMEM;
		return -EAGAIN;
	do {
		next = pud_addr_end(addr, end);
		if (zeromap_pmd_range(mm, pud, addr, next, prot))
			return -ENOMEM;
		err = zeromap_pmd_range(mm, pud, addr, next, prot);
		if (err)
			break;
	} while (pud++, addr = next, addr != end);
	return 0;
	return err;
}

int zeromap_page_range(struct vm_area_struct *vma,