Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7d485f64 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Russell King
Browse files

ARM: 8220/1: allow modules outside of bl range



Loading modules far away from the kernel in memory is problematic
because the 'bl' instruction only has limited reach, and modules are not
built with PLTs. Instead of using the -mlong-calls option (which affects
all compiler emitted bl instructions, but not the ones in assembler),
this patch allocates some additional space at module load time, and
populates it with PLT like veneers when encountering relocations that
are out of range.

This should work with all relocations against symbols exported by the
kernel, including those resulting from GCC generated implicit function
calls for ftrace etc.

The module memory size increases by about 5% on average, regardless of
whether any PLT entries were actually needed. However, due to the page
based rounding that occurs when allocating module memory, the average
memory footprint increase is negligible.

Reviewed-by: default avatarNicolas Pitre <nico@linaro.org>
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: default avatarRussell King <rmk+kernel@arm.linux.org.uk>
parent e748994f
Loading
Loading
Loading
Loading
+16 −1
Original line number Original line Diff line number Diff line
@@ -60,7 +60,7 @@ config ARM
	select HAVE_KPROBES if !XIP_KERNEL
	select HAVE_KPROBES if !XIP_KERNEL
	select HAVE_KRETPROBES if (HAVE_KPROBES)
	select HAVE_KRETPROBES if (HAVE_KPROBES)
	select HAVE_MEMBLOCK
	select HAVE_MEMBLOCK
	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
	select HAVE_MOD_ARCH_SPECIFIC
	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
	select HAVE_OPTPROBES if !THUMB2_KERNEL
	select HAVE_OPTPROBES if !THUMB2_KERNEL
	select HAVE_PERF_EVENTS
	select HAVE_PERF_EVENTS
@@ -1681,6 +1681,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
config ARCH_WANT_GENERAL_HUGETLB
config ARCH_WANT_GENERAL_HUGETLB
	def_bool y
	def_bool y


config ARM_MODULE_PLTS
	bool "Use PLTs to allow module memory to spill over into vmalloc area"
	depends on MODULES
	help
	  Allocate PLTs when loading modules so that jumps and calls whose
	  targets are too far away for their relative offsets to be encoded
	  in the instructions themselves can be bounced via veneers in the
	  module's PLT. This allows modules to be allocated in the generic
	  vmalloc area after the dedicated module memory area has been
	  exhausted. The modules will use slightly more memory, but after
	  rounding up to page size, the actual memory footprint is usually
	  the same.

	  Say y if you are getting out of memory errors while loading modules

source "mm/Kconfig"
source "mm/Kconfig"


config FORCE_MAX_ZONEORDER
config FORCE_MAX_ZONEORDER
+4 −0
Original line number Original line Diff line number Diff line
@@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8
LDFLAGS_MODULE	+= --be8
LDFLAGS_MODULE	+= --be8
endif
endif


ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
LDFLAGS_MODULE	+= -T $(srctree)/arch/arm/kernel/module.lds
endif

OBJCOPYFLAGS	:=-O binary -R .comment -S
OBJCOPYFLAGS	:=-O binary -R .comment -S
GZFLAGS		:=-9
GZFLAGS		:=-9
#KBUILD_CFLAGS	+=-pipe
#KBUILD_CFLAGS	+=-pipe
+11 −1
Original line number Original line Diff line number Diff line
@@ -16,11 +16,21 @@ enum {
	ARM_SEC_UNLIKELY,
	ARM_SEC_UNLIKELY,
	ARM_SEC_MAX,
	ARM_SEC_MAX,
};
};
#endif


struct mod_arch_specific {
struct mod_arch_specific {
#ifdef CONFIG_ARM_UNWIND
	struct unwind_table *unwind[ARM_SEC_MAX];
	struct unwind_table *unwind[ARM_SEC_MAX];
};
#endif
#endif
#ifdef CONFIG_ARM_MODULE_PLTS
	struct elf32_shdr   *core_plt;
	struct elf32_shdr   *init_plt;
	int		    core_plt_count;
	int		    init_plt_count;
#endif
};

u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val);


/*
/*
 * Add the ARM architecture version to the version magic string
 * Add the ARM architecture version to the version magic string
+1 −0
Original line number Original line Diff line number Diff line
@@ -34,6 +34,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_ISA_DMA_API)	+= dma.o
obj-$(CONFIG_ISA_DMA_API)	+= dma.o
obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
obj-$(CONFIG_FIQ)		+= fiq.o fiqasm.o
obj-$(CONFIG_MODULES)		+= armksyms.o module.o
obj-$(CONFIG_MODULES)		+= armksyms.o module.o
obj-$(CONFIG_ARM_MODULE_PLTS)	+= module-plts.o
obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
obj-$(CONFIG_PCI)		+= bios32.o isa.o
obj-$(CONFIG_PCI)		+= bios32.o isa.o
obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
+181 −0
Original line number Original line Diff line number Diff line
/*
 * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/module.h>

#include <asm/cache.h>
#include <asm/opcodes.h>

#define PLT_ENT_STRIDE		L1_CACHE_BYTES
#define PLT_ENT_COUNT		(PLT_ENT_STRIDE / sizeof(u32))
#define PLT_ENT_SIZE		(sizeof(struct plt_entries) / PLT_ENT_COUNT)

#ifdef CONFIG_THUMB2_KERNEL
#define PLT_ENT_LDR		__opcode_to_mem_thumb32(0xf8dff000 | \
							(PLT_ENT_STRIDE - 4))
#else
#define PLT_ENT_LDR		__opcode_to_mem_arm(0xe59ff000 | \
						    (PLT_ENT_STRIDE - 8))
#endif

struct plt_entries {
	u32	ldr[PLT_ENT_COUNT];
	u32	lit[PLT_ENT_COUNT];
};

static bool in_init(const struct module *mod, u32 addr)
{
	return addr - (u32)mod->module_init < mod->init_size;
}

u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
{
	struct plt_entries *plt, *plt_end;
	int c, *count;

	if (in_init(mod, loc)) {
		plt = (void *)mod->arch.init_plt->sh_addr;
		plt_end = (void *)plt + mod->arch.init_plt->sh_size;
		count = &mod->arch.init_plt_count;
	} else {
		plt = (void *)mod->arch.core_plt->sh_addr;
		plt_end = (void *)plt + mod->arch.core_plt->sh_size;
		count = &mod->arch.core_plt_count;
	}

	/* Look for an existing entry pointing to 'val' */
	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
		int i;

		if (!c) {
			/* Populate a new set of entries */
			*plt = (struct plt_entries){
				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
				{ val, }
			};
			++*count;
			return (u32)plt->ldr;
		}
		for (i = 0; i < PLT_ENT_COUNT; i++) {
			if (!plt->lit[i]) {
				plt->lit[i] = val;
				++*count;
			}
			if (plt->lit[i] == val)
				return (u32)&plt->ldr[i];
		}
	}
	BUG();
}

static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
			   u32 mask)
{
	u32 *loc1, *loc2;
	int i;

	for (i = 0; i < num; i++) {
		if (rel[i].r_info != rel[num].r_info)
			continue;

		/*
		 * Identical relocation types against identical symbols can
		 * still result in different PLT entries if the addend in the
		 * place is different. So resolve the target of the relocation
		 * to compare the values.
		 */
		loc1 = (u32 *)(base + rel[i].r_offset);
		loc2 = (u32 *)(base + rel[num].r_offset);
		if (((*loc1 ^ *loc2) & mask) == 0)
			return 1;
	}
	return 0;
}

/* Count how many PLT entries we may need */
static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
{
	unsigned int ret = 0;
	int i;

	/*
	 * Sure, this is order(n^2), but it's usually short, and not
	 * time critical
	 */
	for (i = 0; i < num; i++)
		switch (ELF32_R_TYPE(rel[i].r_info)) {
		case R_ARM_CALL:
		case R_ARM_PC24:
		case R_ARM_JUMP24:
			if (!duplicate_rel(base, rel, i,
					   __opcode_to_mem_arm(0x00ffffff)))
				ret++;
			break;
		case R_ARM_THM_CALL:
		case R_ARM_THM_JUMP24:
			if (!duplicate_rel(base, rel, i,
					   __opcode_to_mem_thumb32(0x07ff2fff)))
				ret++;
		}
	return ret;
}

int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
			      char *secstrings, struct module *mod)
{
	unsigned long core_plts = 0, init_plts = 0;
	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;

	/*
	 * To store the PLTs, we expand the .text section for core module code
	 * and the .init.text section for initialization code.
	 */
	for (s = sechdrs; s < sechdrs_end; ++s)
		if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
			mod->arch.core_plt = s;
		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
			mod->arch.init_plt = s;

	if (!mod->arch.core_plt || !mod->arch.init_plt) {
		pr_err("%s: sections missing\n", mod->name);
		return -ENOEXEC;
	}

	for (s = sechdrs + 1; s < sechdrs_end; ++s) {
		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
		int numrels = s->sh_size / sizeof(Elf32_Rel);
		Elf32_Shdr *dstsec = sechdrs + s->sh_info;

		if (s->sh_type != SHT_REL)
			continue;

		if (strstr(secstrings + s->sh_name, ".init"))
			init_plts += count_plts(dstsec->sh_addr, rels, numrels);
		else
			core_plts += count_plts(dstsec->sh_addr, rels, numrels);
	}

	mod->arch.core_plt->sh_type = SHT_NOBITS;
	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
					       sizeof(struct plt_entries));
	mod->arch.core_plt_count = 0;

	mod->arch.init_plt->sh_type = SHT_NOBITS;
	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
					       sizeof(struct plt_entries));
	mod->arch.init_plt_count = 0;
	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
	return 0;
}
Loading