Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f4ea6dcb authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Michael Ellerman
Browse files

powerpc/mm: Enable mappings above 128TB



Not all user space application is ready to handle wide addresses. It's
known that at least some JIT compilers use higher bits in pointers to
encode their information. It collides with valid pointers with 512TB
addresses and leads to crashes.

To mitigate this, we are not going to allocate virtual address space
above 128TB by default.

But userspace can ask for allocation from full address space by
specifying hint address (with or without MAP_FIXED) above 128TB.

If hint address set above 128TB, but MAP_FIXED is not specified, we try
to look for unmapped area by specified address. If it's already
occupied, we look for unmapped area in *full* address space, rather than
from 128TB window.

This approach helps to easily make application's memory allocator aware
about large address space without manually tracking allocated virtual
address space.

This is going to be a per mmap decision. ie, we can have some mmaps with
larger addresses and other that do not.

A sample memory layout looks like:

  10000000-10010000 r-xp 00000000 fc:00 9057045          /home/max_addr_512TB
  10010000-10020000 r--p 00000000 fc:00 9057045          /home/max_addr_512TB
  10020000-10030000 rw-p 00010000 fc:00 9057045          /home/max_addr_512TB
  10029630000-10029660000 rw-p 00000000 00:00 0          [heap]
  7fff834a0000-7fff834b0000 rw-p 00000000 00:00 0
  7fff834b0000-7fff83670000 r-xp 00000000 fc:00 9177190  /lib/powerpc64le-linux-gnu/libc-2.23.so
  7fff83670000-7fff83680000 r--p 001b0000 fc:00 9177190  /lib/powerpc64le-linux-gnu/libc-2.23.so
  7fff83680000-7fff83690000 rw-p 001c0000 fc:00 9177190  /lib/powerpc64le-linux-gnu/libc-2.23.so
  7fff83690000-7fff836a0000 rw-p 00000000 00:00 0
  7fff836a0000-7fff836c0000 r-xp 00000000 00:00 0        [vdso]
  7fff836c0000-7fff83700000 r-xp 00000000 fc:00 9177193  /lib/powerpc64le-linux-gnu/ld-2.23.so
  7fff83700000-7fff83710000 r--p 00030000 fc:00 9177193  /lib/powerpc64le-linux-gnu/ld-2.23.so
  7fff83710000-7fff83720000 rw-p 00040000 fc:00 9177193  /lib/powerpc64le-linux-gnu/ld-2.23.so
  7fffdccf0000-7fffdcd20000 rw-p 00000000 00:00 0        [stack]
  1000000000000-1000000010000 rw-p 00000000 00:00 0
  1ffff83710000-1ffff83720000 rw-p 00000000 00:00 0

Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent fbfef902
Loading
Loading
Loading
Loading
+17 −6
Original line number Original line Diff line number Diff line
@@ -114,7 +114,7 @@ void release_thread(struct task_struct *);
/*
/*
 * Max value currently used:
 * Max value currently used:
 */
 */
#define TASK_SIZE_USER64 TASK_SIZE_128TB
#define TASK_SIZE_USER64	TASK_SIZE_512TB
#else
#else
#define TASK_SIZE_USER64	TASK_SIZE_64TB
#define TASK_SIZE_USER64	TASK_SIZE_64TB
#endif
#endif
@@ -128,26 +128,37 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
		TASK_SIZE_USER32 : TASK_SIZE_USER64)
		TASK_SIZE_USER32 : TASK_SIZE_USER64)
#define TASK_SIZE	  TASK_SIZE_OF(current)
#define TASK_SIZE	  TASK_SIZE_OF(current)

/* This decides where the kernel will search for a free chunk of vm
/* This decides where the kernel will search for a free chunk of vm
 * space during mmap's.
 * space during mmap's.
 */
 */
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))


#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
#endif
#endif


/*
 * Initial task size value for user applications. For book3s 64 we start
 * with 128TB and conditionally enable upto 512TB
 */
#ifdef CONFIG_PPC_BOOK3S_64
#define DEFAULT_MAP_WINDOW	((is_32bit_task()) ? \
				 TASK_SIZE_USER32 : TASK_SIZE_128TB)
#else
#define DEFAULT_MAP_WINDOW	TASK_SIZE
#endif

#ifdef __powerpc64__
#ifdef __powerpc64__


#define STACK_TOP_USER64 TASK_SIZE_USER64
/* Limit stack to 128TB */
#define STACK_TOP_USER64 TASK_SIZE_128TB
#define STACK_TOP_USER32 TASK_SIZE_USER32
#define STACK_TOP_USER32 TASK_SIZE_USER32


#define STACK_TOP (is_32bit_task() ? \
#define STACK_TOP (is_32bit_task() ? \
		   STACK_TOP_USER32 : STACK_TOP_USER64)
		   STACK_TOP_USER32 : STACK_TOP_USER64)


#define STACK_TOP_MAX STACK_TOP_USER64
#define STACK_TOP_MAX TASK_SIZE_USER64


#else /* __powerpc64__ */
#else /* __powerpc64__ */


+1 −1
Original line number Original line Diff line number Diff line
@@ -923,7 +923,7 @@ void __init setup_arch(char **cmdline_p)


#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC64
#ifdef CONFIG_PPC64
	init_mm.context.addr_limit = TASK_SIZE_USER64;
	init_mm.context.addr_limit = TASK_SIZE_128TB;
#else
#else
#error	"context.addr_limit not initialized."
#error	"context.addr_limit not initialized."
#endif
#endif
+7 −0
Original line number Original line Diff line number Diff line
@@ -50,6 +50,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
	struct hstate *h = hstate_file(file);
	struct hstate *h = hstate_file(file);
	struct vm_unmapped_area_info info;
	struct vm_unmapped_area_info info;


	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
		mm->context.addr_limit = TASK_SIZE;

	if (len & ~huge_page_mask(h))
	if (len & ~huge_page_mask(h))
		return -EINVAL;
		return -EINVAL;
	if (len > mm->context.addr_limit)
	if (len > mm->context.addr_limit)
@@ -78,5 +81,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
	info.high_limit = current->mm->mmap_base;
	info.high_limit = current->mm->mmap_base;
	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
	info.align_offset = 0;
	info.align_offset = 0;

	if (addr > DEFAULT_MAP_WINDOW)
		info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;

	return vm_unmapped_area(&info);
	return vm_unmapped_area(&info);
}
}
+21 −11
Original line number Original line Diff line number Diff line
@@ -79,7 +79,7 @@ static inline unsigned long mmap_base(unsigned long rnd)
	else if (gap > MAX_GAP)
	else if (gap > MAX_GAP)
		gap = MAX_GAP;
		gap = MAX_GAP;


	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
	return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd);
}
}


#ifdef CONFIG_PPC_RADIX_MMU
#ifdef CONFIG_PPC_RADIX_MMU
@@ -97,6 +97,9 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
	struct vm_area_struct *vma;
	struct vm_area_struct *vma;
	struct vm_unmapped_area_info info;
	struct vm_unmapped_area_info info;


	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
		mm->context.addr_limit = TASK_SIZE;

	if (len > mm->context.addr_limit - mmap_min_addr)
	if (len > mm->context.addr_limit - mmap_min_addr)
		return -ENOMEM;
		return -ENOMEM;


@@ -114,8 +117,13 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
	info.flags = 0;
	info.flags = 0;
	info.length = len;
	info.length = len;
	info.low_limit = mm->mmap_base;
	info.low_limit = mm->mmap_base;
	info.high_limit = mm->context.addr_limit;
	info.align_mask = 0;
	info.align_mask = 0;

	if (unlikely(addr > DEFAULT_MAP_WINDOW))
		info.high_limit = mm->context.addr_limit;
	else
		info.high_limit = DEFAULT_MAP_WINDOW;

	return vm_unmapped_area(&info);
	return vm_unmapped_area(&info);
}
}


@@ -131,6 +139,9 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
	unsigned long addr = addr0;
	unsigned long addr = addr0;
	struct vm_unmapped_area_info info;
	struct vm_unmapped_area_info info;


	if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE))
		mm->context.addr_limit = TASK_SIZE;

	/* requested length too big for entire address space */
	/* requested length too big for entire address space */
	if (len > mm->context.addr_limit - mmap_min_addr)
	if (len > mm->context.addr_limit - mmap_min_addr)
		return -ENOMEM;
		return -ENOMEM;
@@ -152,7 +163,14 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
	info.high_limit = mm->mmap_base;
	info.high_limit = mm->mmap_base;
	info.align_mask = 0;
	info.align_mask = 0;

	if (addr > DEFAULT_MAP_WINDOW)
		info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW;

	addr = vm_unmapped_area(&info);
	addr = vm_unmapped_area(&info);
	if (!(addr & ~PAGE_MASK))
		return addr;
	VM_BUG_ON(addr != -ENOMEM);


	/*
	/*
	 * A failed mmap() very likely causes application failure,
	 * A failed mmap() very likely causes application failure,
@@ -160,15 +178,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
	 * can happen with large stack limits and large mmap()
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 * allocations.
	 */
	 */
	if (addr & ~PAGE_MASK) {
	return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
		VM_BUG_ON(addr != -ENOMEM);
		info.flags = 0;
		info.low_limit = TASK_UNMAPPED_BASE;
		info.high_limit = mm->context.addr_limit;
		addr = vm_unmapped_area(&info);
	}

	return addr;
}
}


static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
+1 −1
Original line number Original line Diff line number Diff line
@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
	 * mm->context.addr_limit. Default to max task size so that we copy the
	 * mm->context.addr_limit. Default to max task size so that we copy the
	 * default values to paca which will help us to handle slb miss early.
	 * default values to paca which will help us to handle slb miss early.
	 */
	 */
	mm->context.addr_limit = TASK_SIZE_USER64;
	mm->context.addr_limit = TASK_SIZE_128TB;


	/*
	/*
	 * The old code would re-promote on fork, we don't do that when using
	 * The old code would re-promote on fork, we don't do that when using
Loading