Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 661e4e33 authored by David S. Miller's avatar David S. Miller
Browse files


Daniel Borkmann says:

====================
pull-request: bpf 2018-01-09

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Prevent out-of-bounds speculation in BPF maps by masking the
   index after bounds checks in order to fix spectre v1, and
   add an option BPF_JIT_ALWAYS_ON into Kconfig that allows for
   removing the BPF interpreter from the kernel in favor of
   JIT-only mode to make spectre v2 harder, from Alexei.

2) Remove false sharing of map refcount with max_entries which
   was used in spectre v1, from Daniel.

3) Add a missing NULL psock check in sockmap in order to fix
   a race, from John.

4) Fix test_align BPF selftest case since a recent change in
   verifier rejects the bit-wise arithmetic on pointers
   earlier but test_align update was missing, from Alexei.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4512c43e 290af866
Loading
Loading
Loading
Loading
+18 −8
Original line number Diff line number Diff line
@@ -43,7 +43,14 @@ struct bpf_map_ops {
};

struct bpf_map {
	atomic_t refcnt;
	/* 1st cacheline with read-mostly members of which some
	 * are also accessed in fast-path (e.g. ops, max_entries).
	 */
	const struct bpf_map_ops *ops ____cacheline_aligned;
	struct bpf_map *inner_map_meta;
#ifdef CONFIG_SECURITY
	void *security;
#endif
	enum bpf_map_type map_type;
	u32 key_size;
	u32 value_size;
@@ -52,15 +59,17 @@ struct bpf_map {
	u32 pages;
	u32 id;
	int numa_node;
	struct user_struct *user;
	const struct bpf_map_ops *ops;
	struct work_struct work;
	bool unpriv_array;
	/* 7 bytes hole */

	/* 2nd cacheline with misc members to avoid false sharing
	 * particularly with refcounting.
	 */
	struct user_struct *user ____cacheline_aligned;
	atomic_t refcnt;
	atomic_t usercnt;
	struct bpf_map *inner_map_meta;
	struct work_struct work;
	char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
	void *security;
#endif
};

/* function argument constraints */
@@ -221,6 +230,7 @@ struct bpf_prog_aux {
struct bpf_array {
	struct bpf_map map;
	u32 elem_size;
	u32 index_mask;
	/* 'ownership' of prog_array is claimed by the first program that
	 * is going to use this map or by the first program which FD is stored
	 * in the map to make sure that all callers and callees have the same
+7 −0
Original line number Diff line number Diff line
@@ -1396,6 +1396,13 @@ config BPF_SYSCALL
	  Enable the bpf() system call that allows to manipulate eBPF
	  programs and maps via file descriptors.

config BPF_JIT_ALWAYS_ON
	bool "Permanently enable BPF JIT and remove BPF interpreter"
	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
	help
	  Enables BPF JIT and removes BPF interpreter to avoid
	  speculative execution of BPF instructions by the interpreter

config USERFAULTFD
	bool "Enable userfaultfd() system call"
	select ANON_INODES
+36 −11
Original line number Diff line number Diff line
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
	int numa_node = bpf_map_attr_numa_node(attr);
	u32 elem_size, index_mask, max_entries;
	bool unpriv = !capable(CAP_SYS_ADMIN);
	struct bpf_array *array;
	u64 array_size;
	u32 elem_size;

	/* check sanity of attributes */
	if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)

	elem_size = round_up(attr->value_size, 8);

	max_entries = attr->max_entries;
	index_mask = roundup_pow_of_two(max_entries) - 1;

	if (unpriv)
		/* round up array size to nearest power of 2,
		 * since cpu will speculate within index_mask limits
		 */
		max_entries = index_mask + 1;

	array_size = sizeof(*array);
	if (percpu)
		array_size += (u64) attr->max_entries * sizeof(void *);
		array_size += (u64) max_entries * sizeof(void *);
	else
		array_size += (u64) attr->max_entries * elem_size;
		array_size += (u64) max_entries * elem_size;

	/* make sure there is no u32 overflow later in round_up() */
	if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
	array = bpf_map_area_alloc(array_size, numa_node);
	if (!array)
		return ERR_PTR(-ENOMEM);
	array->index_mask = index_mask;
	array->map.unpriv_array = unpriv;

	/* copy mandatory map attributes */
	array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
	if (unlikely(index >= array->map.max_entries))
		return NULL;

	return array->value + array->elem_size * index;
	return array->value + array->elem_size * (index & array->index_mask);
}

/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct bpf_insn *insn = insn_buf;
	u32 elem_size = round_up(map->value_size, 8);
	const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)

	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
	if (map->unpriv_array) {
		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
	} else {
		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
	}

	if (is_power_of_2(elem_size)) {
		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
	if (unlikely(index >= array->map.max_entries))
		return NULL;

	return this_cpu_ptr(array->pptrs[index]);
	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}

int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
	 */
	size = round_up(map->value_size, 8);
	rcu_read_lock();
	pptr = array->pptrs[index];
	pptr = array->pptrs[index & array->index_mask];
	for_each_possible_cpu(cpu) {
		bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
		off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
		return -EEXIST;

	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
		memcpy(this_cpu_ptr(array->pptrs[index]),
		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
		       value, map->value_size);
	else
		memcpy(array->value + array->elem_size * index,
		memcpy(array->value +
		       array->elem_size * (index & array->index_mask),
		       value, map->value_size);
	return 0;
}
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
	 */
	size = round_up(map->value_size, 8);
	rcu_read_lock();
	pptr = array->pptrs[index];
	pptr = array->pptrs[index & array->index_mask];
	for_each_possible_cpu(cpu) {
		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
		off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
static u32 array_of_map_gen_lookup(struct bpf_map *map,
				   struct bpf_insn *insn_buf)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	u32 elem_size = round_up(map->value_size, 8);
	struct bpf_insn *insn = insn_buf;
	const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,

	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
	if (map->unpriv_array) {
		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
	} else {
		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
	}
	if (is_power_of_2(elem_size))
		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
	else
+19 −0
Original line number Diff line number Diff line
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);

#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
 *	__bpf_prog_run - run eBPF program on a given context
 *	@ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
};

#else
static unsigned int __bpf_prog_ret0(const void *ctx,
				    const struct bpf_insn *insn)
{
	return 0;
}
#endif

bool bpf_prog_array_compatible(struct bpf_array *array,
			       const struct bpf_prog *fp)
{
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 */
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
{
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
	u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);

	fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
#else
	fp->bpf_func = __bpf_prog_ret0;
#endif

	/* eBPF JITs can rewrite the program in case constant
	 * blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
	 */
	if (!bpf_prog_is_dev_bound(fp->aux)) {
		fp = bpf_int_jit_compile(fp);
#ifdef CONFIG_BPF_JIT_ALWAYS_ON
		if (!fp->jited) {
			*err = -ENOTSUPP;
			return fp;
		}
#endif
	} else {
		*err = bpf_prog_offload_compile(fp);
		if (*err)
+9 −2
Original line number Diff line number Diff line
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)

		write_lock_bh(&sock->sk_callback_lock);
		psock = smap_psock_sk(sock);
		/* This check handles a racing sock event that can get the
		 * sk_callback_lock before this case but after xchg happens
		 * causing the refcnt to hit zero and sock user data (psock)
		 * to be null and queued for garbage collection.
		 */
		if (likely(psock)) {
			smap_list_remove(psock, &stab->sock_map[i]);
			smap_release_sock(psock, sock);
		}
		write_unlock_bh(&sock->sk_callback_lock);
	}
	rcu_read_unlock();
Loading