Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bdb85cd1 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Will Deacon
Browse files

arm64/module: switch to ADRP/ADD sequences for PLT entries



Now that we have switched to the small code model entirely, and
reduced the extended KASLR range to 4 GB, we can be sure that the
targets of relative branches that are out of range are in range
for a ADRP/ADD pair, which is one instruction shorter than our
current MOVN/MOVK/MOVK sequence, and is more idiomatic and so it
is more likely to be implemented efficiently by micro-architectures.

So switch over the ordinary PLT code and the special handling of
the Cortex-A53 ADRP errata, as well as the ftrace trampline
handling.

Reviewed-by: default avatarTorsten Duwe <duwe@lst.de>
Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
[will: Added a couple of comments in the plt equality check]
Signed-off-by: default avatarWill Deacon <will.deacon@arm.com>
parent 7aaf7b2f
Loading
Loading
Loading
Loading
+8 −28
Original line number Diff line number Diff line
@@ -58,39 +58,19 @@ struct plt_entry {
	 * is exactly what we are dealing with here, we are free to use x16
	 * as a scratch register in the PLT veneers.
	 */
	__le32	mov0;	/* movn	x16, #0x....			*/
	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/
	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/
	__le32	adrp;	/* adrp	x16, ....			*/
	__le32	add;	/* add	x16, x16, #0x....		*/
	__le32	br;	/* br	x16				*/
};

static inline struct plt_entry get_plt_entry(u64 val)
static inline bool is_forbidden_offset_for_adrp(void *place)
{
	/*
	 * MOVK/MOVN/MOVZ opcode:
	 * +--------+------------+--------+-----------+-------------+---------+
	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
	 * +--------+------------+--------+-----------+-------------+---------+
	 *
	 * Rd     := 0x10 (x16)
	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
	 * sf     := 1 (64-bit variant)
	 */
	return (struct plt_entry){
		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5),
		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
		cpu_to_le32(0xd61f0200)
	};
	return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
	       cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
	       ((u64)place & 0xfff) >= 0xff8;
}

static inline bool plt_entries_equal(const struct plt_entry *a,
				     const struct plt_entry *b)
{
	return a->mov0 == b->mov0 &&
	       a->mov1 == b->mov1 &&
	       a->mov2 == b->mov2;
}
struct plt_entry get_plt_entry(u64 dst, void *pc);
bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);

#endif /* __ASM_MODULE_H */
+1 −1
Original line number Diff line number Diff line
@@ -104,7 +104,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
		 * is added in the future, but for now, the pr_err() below
		 * deals with a theoretical issue only.
		 */
		trampoline = get_plt_entry(addr);
		trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
		if (!plt_entries_equal(mod->arch.ftrace_trampoline,
				       &trampoline)) {
			if (!plt_entries_equal(mod->arch.ftrace_trampoline,
+78 −21
Original line number Diff line number Diff line
@@ -11,6 +11,61 @@
#include <linux/module.h>
#include <linux/sort.h>

static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
					    enum aarch64_insn_register reg)
{
	u32 adrp, add;

	adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP);
	add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K,
					   AARCH64_INSN_VARIANT_64BIT,
					   AARCH64_INSN_ADSB_ADD);

	return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) };
}

struct plt_entry get_plt_entry(u64 dst, void *pc)
{
	struct plt_entry plt;
	static u32 br;

	if (!br)
		br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
						 AARCH64_INSN_BRANCH_NOLINK);

	plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16);
	plt.br = cpu_to_le32(br);

	return plt;
}

bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
{
	u64 p, q;

	/*
	 * Check whether both entries refer to the same target:
	 * do the cheapest checks first.
	 * If the 'add' or 'br' opcodes are different, then the target
	 * cannot be the same.
	 */
	if (a->add != b->add || a->br != b->br)
		return false;

	p = ALIGN_DOWN((u64)a, SZ_4K);
	q = ALIGN_DOWN((u64)b, SZ_4K);

	/*
	 * If the 'adrp' opcodes are the same then we just need to check
	 * that they refer to the same 4k region.
	 */
	if (a->adrp == b->adrp && p == q)
		return true;

	return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) ==
	       (q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
}

static bool in_init(const struct module *mod, void *loc)
{
	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -24,19 +79,23 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
							  &mod->arch.init;
	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
	int i = pltsec->plt_num_entries;
	int j = i - 1;
	u64 val = sym->st_value + rela->r_addend;

	plt[i] = get_plt_entry(val);
	if (is_forbidden_offset_for_adrp(&plt[i].adrp))
		i++;

	plt[i] = get_plt_entry(val, &plt[i]);

	/*
	 * Check if the entry we just created is a duplicate. Given that the
	 * relocations are sorted, this will be the last entry we allocated.
	 * (if one exists).
	 */
	if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
		return (u64)&plt[i - 1];
	if (j >= 0 && plt_entries_equal(plt + i, plt + j))
		return (u64)&plt[j];

	pltsec->plt_num_entries++;
	pltsec->plt_num_entries += i - j;
	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
		return 0;

@@ -51,35 +110,24 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
							  &mod->arch.init;
	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
	int i = pltsec->plt_num_entries++;
	u32 mov0, mov1, mov2, br;
	u32 br;
	int rd;

	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
		return 0;

	if (is_forbidden_offset_for_adrp(&plt[i].adrp))
		i = pltsec->plt_num_entries++;

	/* get the destination register of the ADRP instruction */
	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
					  le32_to_cpup((__le32 *)loc));

	/* generate the veneer instructions */
	mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
					 AARCH64_INSN_VARIANT_64BIT,
					 AARCH64_INSN_MOVEWIDE_INVERSE);
	mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
					 AARCH64_INSN_VARIANT_64BIT,
					 AARCH64_INSN_MOVEWIDE_KEEP);
	mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
					 AARCH64_INSN_VARIANT_64BIT,
					 AARCH64_INSN_MOVEWIDE_KEEP);
	br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
					 AARCH64_INSN_BRANCH_NOLINK);

	plt[i] = (struct plt_entry){
			cpu_to_le32(mov0),
			cpu_to_le32(mov1),
			cpu_to_le32(mov2),
			cpu_to_le32(br)
		};
	plt[i] = __get_adrp_add_pair(val, (u64)&plt[i], rd);
	plt[i].br = cpu_to_le32(br);

	return (u64)&plt[i];
}
@@ -195,6 +243,15 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
			break;
		}
	}

	if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
	    cpus_have_const_cap(ARM64_WORKAROUND_843419))
		/*
		 * Add some slack so we can skip PLT slots that may trigger
		 * the erratum due to the placement of the ADRP instruction.
		 */
		ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));

	return ret;
}

+1 −3
Original line number Diff line number Diff line
@@ -203,9 +203,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
{
	u32 insn;

	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
	    !cpus_have_const_cap(ARM64_WORKAROUND_843419) ||
	    ((u64)place & 0xfff) < 0xff8)
	if (!is_forbidden_offset_for_adrp(place))
		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
				      AARCH64_INSN_IMM_ADR);