Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 88a78dc2 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Gerrit - the friendly Code Review server
Browse files

mm: VMA sequence count



Wrap the VMA modifications (vma_adjust/unmap_page_range) with sequence
counts such that we can easily test if a VMA is changed.

The unmap_page_range() one allows us to make assumptions about
page-tables; when we find the seqcount hasn't changed we can assume
page-tables are still valid.

The flip side is that we cannot distinguish between a vma_adjust() and
the unmap_page_range() -- where with the former we could have
re-checked the vma bounds against the address.

Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>

[Port to 4.12 kernel]
[Build depends on CONFIG_SPECULATIVE_PAGE_FAULT]
[Introduce vm_write_* inline function depending on
 CONFIG_SPECULATIVE_PAGE_FAULT]
[Fix lock dependency between mapping->i_mmap_rwsem and vma->vm_sequence by
 using vm_raw_write* functions]
[Fix a lock dependency warning in mmap_region() when entering the error
 path]
[move sequence initialisation INIT_VMA()]
Signed-off-by: default avatarLaurent Dufour <ldufour@linux.vnet.ibm.com>
Change-Id: Ibc23ef3b9dbb80323c0f24cb06da34b4c3a8fa71
Patch-mainline: linux-mm @ 17 Apr 2018 16:33:14
[vinmenon@codeaurora.org: trivial merge conflict fixes]
Signed-off-by: default avatarVinayak Menon <vinmenon@codeaurora.org>
[charante@codeaurora.org: trivial merge conflict fixes]
Signed-off-by: default avatarCharan Teja Reddy <charante@codeaurora.org>
parent ead04c98
Loading
Loading
Loading
Loading
+44 −0
Original line number Diff line number Diff line
@@ -456,6 +456,9 @@ struct vm_operations_struct {
static inline void INIT_VMA(struct vm_area_struct *vma)
{
	INIT_LIST_HEAD(&vma->anon_vma_chain);
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
	seqcount_init(&vma->vm_sequence);
#endif
}

static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
@@ -1401,6 +1404,47 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
			void *buf, int len, int write);

#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline void vm_write_begin(struct vm_area_struct *vma)
{
	write_seqcount_begin(&vma->vm_sequence);
}
static inline void vm_write_begin_nested(struct vm_area_struct *vma,
					 int subclass)
{
	write_seqcount_begin_nested(&vma->vm_sequence, subclass);
}
static inline void vm_write_end(struct vm_area_struct *vma)
{
	write_seqcount_end(&vma->vm_sequence);
}
static inline void vm_raw_write_begin(struct vm_area_struct *vma)
{
	raw_write_seqcount_begin(&vma->vm_sequence);
}
static inline void vm_raw_write_end(struct vm_area_struct *vma)
{
	raw_write_seqcount_end(&vma->vm_sequence);
}
#else
static inline void vm_write_begin(struct vm_area_struct *vma)
{
}
static inline void vm_write_begin_nested(struct vm_area_struct *vma,
					 int subclass)
{
}
static inline void vm_write_end(struct vm_area_struct *vma)
{
}
static inline void vm_raw_write_begin(struct vm_area_struct *vma)
{
}
static inline void vm_raw_write_end(struct vm_area_struct *vma)
{
}
#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */

extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
+3 −0
Original line number Diff line number Diff line
@@ -330,6 +330,9 @@ struct vm_area_struct {
	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
#endif
	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
	seqcount_t vm_sequence;
#endif
} __randomize_layout;

struct core_thread {
+2 −0
Original line number Diff line number Diff line
@@ -1502,6 +1502,7 @@ void unmap_page_range(struct mmu_gather *tlb,
	unsigned long next;

	BUG_ON(addr >= end);
	vm_write_begin(vma);
	tlb_start_vma(tlb, vma);
	pgd = pgd_offset(vma->vm_mm, addr);
	do {
@@ -1511,6 +1512,7 @@ void unmap_page_range(struct mmu_gather *tlb,
		next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
	} while (pgd++, addr = next, addr != end);
	tlb_end_vma(tlb, vma);
	vm_write_end(vma);
}


+31 −0
Original line number Diff line number Diff line
@@ -701,6 +701,30 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
	long adjust_next = 0;
	int remove_next = 0;

	/*
	 * Why using vm_raw_write*() functions here to avoid lockdep's warning ?
	 *
	 * Locked is complaining about a theoretical lock dependency, involving
	 * 3 locks:
	 *   mapping->i_mmap_rwsem --> vma->vm_sequence --> fs_reclaim
	 *
	 * Here are the major path leading to this dependency :
	 *  1. __vma_adjust() mmap_sem  -> vm_sequence -> i_mmap_rwsem
	 *  2. move_vmap() mmap_sem -> vm_sequence -> fs_reclaim
	 *  3. __alloc_pages_nodemask() fs_reclaim -> i_mmap_rwsem
	 *  4. unmap_mapping_range() i_mmap_rwsem -> vm_sequence
	 *
	 * So there is no way to solve this easily, especially because in
	 * unmap_mapping_range() the i_mmap_rwsem is grab while the impacted
	 * VMAs are not yet known.
	 * However, the way the vm_seq is used is guarantying that we will
	 * never block on it since we just check for its value and never wait
	 * for it to move, see vma_has_changed() and handle_speculative_fault().
	 */
	vm_raw_write_begin(vma);
	if (next)
		vm_raw_write_begin(next);

	if (next && !insert) {
		struct vm_area_struct *exporter = NULL, *importer = NULL;

@@ -911,6 +935,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
			anon_vma_merge(vma, next);
		mm->map_count--;
		mpol_put(vma_policy(next));
		vm_raw_write_end(next);
		vm_area_free(next);
		/*
		 * In mprotect's case 6 (see comments on vma_merge),
@@ -925,6 +950,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
			 * "vma->vm_next" gap must be updated.
			 */
			next = vma->vm_next;
			if (next)
				vm_raw_write_begin(next);
		} else {
			/*
			 * For the scope of the comment "next" and
@@ -971,6 +998,10 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
	if (insert && file)
		uprobe_mmap(insert);

	if (next && next != vma)
		vm_raw_write_end(next);
	vm_raw_write_end(vma);

	validate_mm(mm);

	return 0;