Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 77cd3d0c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:
 "The main changes:

   - add initial commits to randomize kernel memory section virtual
     addresses, enabled via a new kernel option: RANDOMIZE_MEMORY
     (Thomas Garnier, Kees Cook, Baoquan He, Yinghai Lu)

   - enhance KASLR (RANDOMIZE_BASE) physical memory randomization (Kees
     Cook)

   - EBDA/BIOS region boot quirk cleanups (Andy Lutomirski, Ingo Molnar)

   - misc cleanups/fixes"

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/boot: Simplify EBDA-vs-BIOS reservation logic
  x86/boot: Clarify what x86_legacy_features.reserve_bios_regions does
  x86/boot: Reorganize and clean up the BIOS area reservation code
  x86/mm: Do not reference phys addr beyond kernel
  x86/mm: Add memory hotplug support for KASLR memory randomization
  x86/mm: Enable KASLR for vmalloc memory regions
  x86/mm: Enable KASLR for physical mapping memory regions
  x86/mm: Implement ASLR for kernel memory regions
  x86/mm: Separate variable for trampoline PGD
  x86/mm: Add PUD VA support for physical mapping
  x86/mm: Update physical mapping variable names
  x86/mm: Refactor KASLR entropy functions
  x86/KASLR: Fix boot crash with certain memory configurations
  x86/boot/64: Add forgotten end of function marker
  x86/KASLR: Allow randomization below the load address
  x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G
  x86/KASLR: Randomize virtual address separately
  x86/KASLR: Clarify identity map interface
  x86/boot: Refuse to build with data relocations
  x86/KASLR, x86/power: Remove x86 hibernation restrictions
parents 0f657262 6a79296c
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -1803,12 +1803,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
	js=		[HW,JOY] Analog joystick
			See Documentation/input/joystick.txt.

	kaslr/nokaslr	[X86]
			Enable/disable kernel and module base offset ASLR
			(Address Space Layout Randomization) if built into
			the kernel. When CONFIG_HIBERNATION is selected,
			kASLR is disabled by default. When kASLR is enabled,
			hibernation will be disabled.
	nokaslr		[KNL]
			When CONFIG_RANDOMIZE_BASE is set, this disables
			kernel and module base offset ASLR (Address Space
			Layout Randomization).

	keepinitrd	[HW,ARM]

+4 −0
Original line number Diff line number Diff line
@@ -39,4 +39,8 @@ memory window (this size is arbitrary, it can be raised later if needed).
The mappings are not part of any other kernel PGD and are only available
during EFI runtime calls.

Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
Their order is preserved but their base will be offset early at boot time.

-Andi Kleen, Jul 2004
+48 −11
Original line number Diff line number Diff line
@@ -1929,21 +1929,26 @@ config RANDOMIZE_BASE
	  attempts relying on knowledge of the location of kernel
	  code internals.

	  The kernel physical and virtual address can be randomized
	  from 16MB up to 1GB on 64-bit and 512MB on 32-bit. (Note that
	  using RANDOMIZE_BASE reduces the memory space available to
	  kernel modules from 1.5GB to 1GB.)
	  On 64-bit, the kernel physical and virtual addresses are
	  randomized separately. The physical address will be anywhere
	  between 16MB and the top of physical memory (up to 64TB). The
	  virtual address will be randomized from 16MB up to 1GB (9 bits
	  of entropy). Note that this also reduces the memory space
	  available to kernel modules from 1.5GB to 1GB.

	  On 32-bit, the kernel physical and virtual addresses are
	  randomized together. They will be randomized from 16MB up to
	  512MB (8 bits of entropy).

	  Entropy is generated using the RDRAND instruction if it is
	  supported. If RDTSC is supported, its value is mixed into
	  the entropy pool as well. If neither RDRAND nor RDTSC are
	  supported, then entropy is read from the i8254 timer.

	  Since the kernel is built using 2GB addressing, and
	  PHYSICAL_ALIGN must be at a minimum of 2MB, only 10 bits of
	  entropy is theoretically possible. Currently, with the
	  default value for PHYSICAL_ALIGN and due to page table
	  layouts, 64-bit uses 9 bits of entropy and 32-bit uses 8 bits.
	  supported, then entropy is read from the i8254 timer. The
	  usable entropy is limited by the kernel being built using
	  2GB addressing, and that PHYSICAL_ALIGN must be at a
	  minimum of 2MB. As a result, only 10 bits of entropy are
	  theoretically possible, but the implementations are further
	  limited due to memory layouts.

	  If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot
	  time. To enable it, boot with "kaslr" on the kernel command
@@ -1983,6 +1988,38 @@ config PHYSICAL_ALIGN

	  Don't change this unless you know what you are doing.

config RANDOMIZE_MEMORY
	bool "Randomize the kernel memory sections"
	depends on X86_64
	depends on RANDOMIZE_BASE
	default RANDOMIZE_BASE
	---help---
	   Randomizes the base virtual address of kernel memory sections
	   (physical memory mapping, vmalloc & vmemmap). This security feature
	   makes exploits relying on predictable memory locations less reliable.

	   The order of allocations remains unchanged. Entropy is generated in
	   the same way as RANDOMIZE_BASE. Current implementation in the optimal
	   configuration have in average 30,000 different possible virtual
	   addresses for each memory section.

	   If unsure, say N.

config RANDOMIZE_MEMORY_PHYSICAL_PADDING
	hex "Physical memory mapping padding" if EXPERT
	depends on RANDOMIZE_MEMORY
	default "0xa" if MEMORY_HOTPLUG
	default "0x0"
	range 0x1 0x40 if MEMORY_HOTPLUG
	range 0x0 0x40
	---help---
	   Define the padding in terabytes added to the existing physical
	   memory size during kernel memory randomization. It is useful
	   for memory hotplug support but reduces the entropy available for
	   address randomization.

	   If unsure, leave at the default value.

config HOTPLUG_CPU
	bool "Support for hot-pluggable CPUs"
	depends on SMP
+18 −0
Original line number Diff line number Diff line
@@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
	$(objtree)/drivers/firmware/efi/libstub/lib.a
vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o

# The compressed kernel is built with -fPIC/-fPIE so that a boot loader
# can place it anywhere in memory and it will still run. However, since
# it is executed as-is without any ELF relocation processing performed
# (and has already had all relocation sections stripped from the binary),
# none of the code can use data relocations (e.g. static assignments of
# pointer values), since they will be meaningless at runtime. This check
# will refuse to link the vmlinux if any of these relocations are found.
quiet_cmd_check_data_rel = DATAREL $@
define cmd_check_data_rel
	for obj in $(filter %.o,$^); do \
		readelf -S $$obj | grep -qF .rel.local && { \
			echo "error: $$obj has data relocations!" >&2; \
			exit 1; \
		} || true; \
	done
endef

$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
	$(call if_changed,check_data_rel)
	$(call if_changed,ld)

OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
+108 −143
Original line number Diff line number Diff line
@@ -12,10 +12,6 @@
#include "misc.h"
#include "error.h"

#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>

#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
@@ -26,26 +22,6 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;

#define I8254_PORT_CONTROL	0x43
#define I8254_PORT_COUNTER0	0x40
#define I8254_CMD_READBACK	0xC0
#define I8254_SELECT_COUNTER0	0x02
#define I8254_STATUS_NOTREADY	0x40
static inline u16 i8254(void)
{
	u16 status, timer;

	do {
		outb(I8254_PORT_CONTROL,
		     I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
		status = inb(I8254_PORT_COUNTER0);
		timer  = inb(I8254_PORT_COUNTER0);
		timer |= inb(I8254_PORT_COUNTER0) << 8;
	} while (status & I8254_STATUS_NOTREADY);

	return timer;
}

static unsigned long rotate_xor(unsigned long hash, const void *area,
				size_t size)
{
@@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area,
}

/* Attempt to create a simple but unpredictable starting entropy. */
static unsigned long get_random_boot(void)
static unsigned long get_boot_seed(void)
{
	unsigned long hash = 0;

@@ -72,50 +48,8 @@ static unsigned long get_random_boot(void)
	return hash;
}

static unsigned long get_random_long(const char *purpose)
{
#ifdef CONFIG_X86_64
	const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
#else
	const unsigned long mix_const = 0x3f39e593UL;
#endif
	unsigned long raw, random = get_random_boot();
	bool use_i8254 = true;

	debug_putstr(purpose);
	debug_putstr(" KASLR using");

	if (has_cpuflag(X86_FEATURE_RDRAND)) {
		debug_putstr(" RDRAND");
		if (rdrand_long(&raw)) {
			random ^= raw;
			use_i8254 = false;
		}
	}

	if (has_cpuflag(X86_FEATURE_TSC)) {
		debug_putstr(" RDTSC");
		raw = rdtsc();

		random ^= raw;
		use_i8254 = false;
	}

	if (use_i8254) {
		debug_putstr(" i8254");
		random ^= i8254();
	}

	/* Circular multiply for better bit diffusion */
	asm("mul %3"
	    : "=a" (random), "=d" (raw)
	    : "a" (random), "rm" (mix_const));
	random += raw;

	debug_putstr("...\n");

	return random;
}
#define KASLR_COMPRESSED_BOOT
#include "../../lib/kaslr.c"

struct mem_vector {
	unsigned long start;
@@ -132,17 +66,6 @@ enum mem_avoid_index {

static struct mem_vector mem_avoid[MEM_AVOID_MAX];

static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
{
	/* Item at least partially before region. */
	if (item->start < region->start)
		return false;
	/* Item at least partially after region. */
	if (item->start + item->size > region->start + region->size)
		return false;
	return true;
}

static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
{
	/* Item one is entirely before item two. */
@@ -296,6 +219,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,
		if (mem_overlaps(img, &mem_avoid[i]) &&
		    mem_avoid[i].start < earliest) {
			*overlap = mem_avoid[i];
			earliest = overlap->start;
			is_overlapping = true;
		}
	}
@@ -310,6 +234,7 @@ static bool mem_avoid_overlap(struct mem_vector *img,

		if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
			*overlap = avoid;
			earliest = overlap->start;
			is_overlapping = true;
		}

@@ -319,8 +244,6 @@ static bool mem_avoid_overlap(struct mem_vector *img,
	return is_overlapping;
}

static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN];

struct slot_area {
	unsigned long addr;
	int num;
@@ -351,36 +274,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size)
	}
}

static void slots_append(unsigned long addr)
{
	/* Overflowing the slots list should be impossible. */
	if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN)
		return;

	slots[slot_max++] = addr;
}

static unsigned long slots_fetch_random(void)
{
	unsigned long slot;
	int i;

	/* Handle case of no slots stored. */
	if (slot_max == 0)
		return 0;

	return slots[get_random_long("Physical") % slot_max];
	slot = kaslr_get_random_long("Physical") % slot_max;

	for (i = 0; i < slot_area_index; i++) {
		if (slot >= slot_areas[i].num) {
			slot -= slot_areas[i].num;
			continue;
		}
		return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN;
	}

	if (i == slot_area_index)
		debug_putstr("slots_fetch_random() failed!?\n");
	return 0;
}

static void process_e820_entry(struct e820entry *entry,
			       unsigned long minimum,
			       unsigned long image_size)
{
	struct mem_vector region, img, overlap;
	struct mem_vector region, overlap;
	struct slot_area slot_area;
	unsigned long start_orig;

	/* Skip non-RAM entries. */
	if (entry->type != E820_RAM)
		return;

	/* Ignore entries entirely above our maximum. */
	if (entry->addr >= KERNEL_IMAGE_SIZE)
	/* On 32-bit, ignore entries entirely above our maximum. */
	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
		return;

	/* Ignore entries entirely below our minimum. */
@@ -390,31 +321,55 @@ static void process_e820_entry(struct e820entry *entry,
	region.start = entry->addr;
	region.size = entry->size;

	/* Give up if slot area array is full. */
	while (slot_area_index < MAX_SLOT_AREA) {
		start_orig = region.start;

		/* Potentially raise address to minimum location. */
		if (region.start < minimum)
			region.start = minimum;

	/* Potentially raise address to meet alignment requirements. */
		/* Potentially raise address to meet alignment needs. */
		region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);

	/* Did we raise the address above the bounds of this e820 region? */
		/* Did we raise the address above this e820 region? */
		if (region.start > entry->addr + entry->size)
			return;

		/* Reduce size by any delta from the original address. */
	region.size -= region.start - entry->addr;
		region.size -= region.start - start_orig;

	/* Reduce maximum size to fit end of image within maximum limit. */
	if (region.start + region.size > KERNEL_IMAGE_SIZE)
		/* On 32-bit, reduce region size to fit within max size. */
		if (IS_ENABLED(CONFIG_X86_32) &&
		    region.start + region.size > KERNEL_IMAGE_SIZE)
			region.size = KERNEL_IMAGE_SIZE - region.start;

	/* Walk each aligned slot and check for avoided areas. */
	for (img.start = region.start, img.size = image_size ;
	     mem_contains(&region, &img) ;
	     img.start += CONFIG_PHYSICAL_ALIGN) {
		if (mem_avoid_overlap(&img, &overlap))
			continue;
		slots_append(img.start);
		/* Return if region can't contain decompressed kernel */
		if (region.size < image_size)
			return;

		/* If nothing overlaps, store the region and return. */
		if (!mem_avoid_overlap(&region, &overlap)) {
			store_slot_info(&region, image_size);
			return;
		}

		/* Store beginning of region if holds at least image_size. */
		if (overlap.start > region.start + image_size) {
			struct mem_vector beginning;

			beginning.start = region.start;
			beginning.size = overlap.start - region.start;
			store_slot_info(&beginning, image_size);
		}

		/* Return if overlap extends to or past end of region. */
		if (overlap.start + overlap.size >= region.start + region.size)
			return;

		/* Clip off the overlapping region and start over. */
		region.size -= overlap.start - region.start + overlap.size;
		region.start = overlap.start + overlap.size;
	}
}

@@ -431,6 +386,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum,
	for (i = 0; i < boot_params->e820_entries; i++) {
		process_e820_entry(&boot_params->e820_map[i], minimum,
				   image_size);
		if (slot_area_index == MAX_SLOT_AREA) {
			debug_putstr("Aborted e820 scan (slot_areas full)!\n");
			break;
		}
	}

	return slots_fetch_random();
@@ -454,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
		 CONFIG_PHYSICAL_ALIGN + 1;

	random_addr = get_random_long("Virtual") % slots;
	random_addr = kaslr_get_random_long("Virtual") % slots;

	return random_addr * CONFIG_PHYSICAL_ALIGN + minimum;
}
@@ -463,48 +422,54 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
 * Since this function examines addresses much more numerically,
 * it takes the input and output pointers as 'unsigned long'.
 */
unsigned char *choose_random_location(unsigned long input,
void choose_random_location(unsigned long input,
			    unsigned long input_size,
				      unsigned long output,
				      unsigned long output_size)
			    unsigned long *output,
			    unsigned long output_size,
			    unsigned long *virt_addr)
{
	unsigned long choice = output;
	unsigned long random_addr;
	unsigned long random_addr, min_addr;

	/* By default, keep output position unchanged. */
	*virt_addr = *output;

#ifdef CONFIG_HIBERNATION
	if (!cmdline_find_option_bool("kaslr")) {
		warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected).");
		goto out;
	}
#else
	if (cmdline_find_option_bool("nokaslr")) {
		warn("KASLR disabled: 'nokaslr' on cmdline.");
		goto out;
		return;
	}
#endif

	boot_params->hdr.loadflags |= KASLR_FLAG;

	/* Prepare to add new identity pagetables on demand. */
	initialize_identity_maps();

	/* Record the various known unsafe memory ranges. */
	mem_avoid_init(input, input_size, output);
	mem_avoid_init(input, input_size, *output);

	/*
	 * Low end of the randomization range should be the
	 * smaller of 512M or the initial kernel image
	 * location:
	 */
	min_addr = min(*output, 512UL << 20);

	/* Walk e820 and find a random address. */
	random_addr = find_random_phys_addr(output, output_size);
	random_addr = find_random_phys_addr(min_addr, output_size);
	if (!random_addr) {
		warn("KASLR disabled: could not find suitable E820 region!");
		goto out;
	} else {
		/* Update the new physical address location. */
		if (*output != random_addr) {
			add_identity_map(random_addr, output_size);
			*output = random_addr;
		}
	}

	/* Always enforce the minimum. */
	if (random_addr < choice)
		goto out;

	choice = random_addr;

	add_identity_map(choice, output_size);

	/* This actually loads the identity pagetable on x86_64. */
	finalize_identity_maps();
out:
	return (unsigned char *)choice;

	/* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */
	if (IS_ENABLED(CONFIG_X86_64))
		random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size);
	*virt_addr = random_addr;
}
Loading