Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9bdf64d5 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-map-in-map'



Martin KaFai Lau says:

====================
bpf: Add map-in-map support

This patchset adds map-in-map support (map->map).
One use case is the (vips -> webservers) in the L4 load balancer so
that different vips can be backed by different set of webservers.

Please refer to the individual commit log for details.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b4f0a661 fb30d4b7
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@ struct bpf_map {
	const struct bpf_map_ops *ops;
	struct work_struct work;
	atomic_t usercnt;
	struct bpf_map *inner_map_meta;
};

struct bpf_map_type_list {
@@ -276,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
				 void *key, void *value, u64 map_flags);
void bpf_fd_array_map_clear(struct bpf_map *map);
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
				void *key, void *value, u64 map_flags);

/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
 * forced to use 'long' read/writes to try to atomically copy long counters.
+3 −0
Original line number Diff line number Diff line
@@ -96,6 +96,8 @@ enum bpf_map_type {
	BPF_MAP_TYPE_LRU_HASH,
	BPF_MAP_TYPE_LRU_PERCPU_HASH,
	BPF_MAP_TYPE_LPM_TRIE,
	BPF_MAP_TYPE_ARRAY_OF_MAPS,
	BPF_MAP_TYPE_HASH_OF_MAPS,
};

enum bpf_prog_type {
@@ -152,6 +154,7 @@ union bpf_attr {
		__u32	value_size;	/* size of value in bytes */
		__u32	max_entries;	/* max number of entries in a map */
		__u32	map_flags;	/* prealloc or not */
		__u32	inner_map_fd;	/* fd pointing to the inner map */
	};

	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+1 −1
Original line number Diff line number Diff line
obj-y := core.o

obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
+67 −7
Original line number Diff line number Diff line
@@ -17,6 +17,8 @@
#include <linux/filter.h>
#include <linux/perf_event.h>

#include "map_in_map.h"

static void bpf_array_free_percpu(struct bpf_array *array)
{
	int i;
@@ -117,20 +119,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
	struct bpf_array *array = container_of(map, struct bpf_array, map);
	struct bpf_insn *insn = insn_buf;
	u32 elem_size = array->elem_size;
	u32 elem_size = round_up(map->value_size, 8);
	const int ret = BPF_REG_0;
	const int map_ptr = BPF_REG_1;
	const int index = BPF_REG_2;

	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, array->map.max_entries,
			      elem_size == 1 ? 2 : 3);
	if (elem_size == 1) {
		/* nop */
	} else if (is_power_of_2(elem_size)) {
	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);

	if (is_power_of_2(elem_size)) {
		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
	} else {
		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
@@ -605,3 +604,64 @@ static int __init register_cgroup_array_map(void)
}
late_initcall(register_cgroup_array_map);
#endif

static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
{
	struct bpf_map *map, *inner_map_meta;

	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
	if (IS_ERR(inner_map_meta))
		return inner_map_meta;

	map = fd_array_map_alloc(attr);
	if (IS_ERR(map)) {
		bpf_map_meta_free(inner_map_meta);
		return map;
	}

	map->inner_map_meta = inner_map_meta;

	return map;
}

static void array_of_map_free(struct bpf_map *map)
{
	/* map->inner_map_meta is only accessed by syscall which
	 * is protected by fdget/fdput.
	 */
	bpf_map_meta_free(map->inner_map_meta);
	bpf_fd_array_map_clear(map);
	fd_array_map_free(map);
}

static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
{
	struct bpf_map **inner_map = array_map_lookup_elem(map, key);

	if (!inner_map)
		return NULL;

	return READ_ONCE(*inner_map);
}

static const struct bpf_map_ops array_of_map_ops = {
	.map_alloc = array_of_map_alloc,
	.map_free = array_of_map_free,
	.map_get_next_key = array_map_get_next_key,
	.map_lookup_elem = array_of_map_lookup_elem,
	.map_delete_elem = fd_array_map_delete_elem,
	.map_fd_get_ptr = bpf_map_fd_get_ptr,
	.map_fd_put_ptr = bpf_map_fd_put_ptr,
};

static struct bpf_map_type_list array_of_map_type __ro_after_init = {
	.ops = &array_of_map_ops,
	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
};

static int __init register_array_of_map(void)
{
	bpf_register_map_type(&array_of_map_type);
	return 0;
}
late_initcall(register_array_of_map);
+121 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
#include "map_in_map.h"

struct bucket {
	struct hlist_nulls_head head;
@@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size
	return *(void __percpu **)(l->key + key_size);
}

static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
{
	return *(void **)(l->key + roundup(map->key_size, 8));
}

static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
{
	return (struct htab_elem *) (htab->elems + i * htab->elem_size);
@@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head)

static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
	struct bpf_map *map = &htab->map;

	if (map->ops->map_fd_put_ptr) {
		void *ptr = fd_htab_map_get_ptr(map, l);

		map->ops->map_fd_put_ptr(ptr);
	}

	if (l->state == HTAB_EXTRA_ELEM_USED) {
		l->state = HTAB_EXTRA_ELEM_FREE;
		return;
@@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab)
		}
	}
}

/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void htab_map_free(struct bpf_map *map)
{
@@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = {
	.type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
};

static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
{
	struct bpf_map *map;

	if (attr->value_size != sizeof(u32))
		return ERR_PTR(-EINVAL);

	/* pointer is stored internally */
	attr->value_size = sizeof(void *);
	map = htab_map_alloc(attr);
	attr->value_size = sizeof(u32);

	return map;
}

static void fd_htab_map_free(struct bpf_map *map)
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_nulls_node *n;
	struct hlist_nulls_head *head;
	struct htab_elem *l;
	int i;

	for (i = 0; i < htab->n_buckets; i++) {
		head = select_bucket(htab, i);

		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
			void *ptr = fd_htab_map_get_ptr(map, l);

			map->ops->map_fd_put_ptr(ptr);
		}
	}

	htab_map_free(map);
}

/* only called from syscall */
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
				void *key, void *value, u64 map_flags)
{
	void *ptr;
	int ret;
	u32 ufd = *(u32 *)value;

	ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
	if (IS_ERR(ptr))
		return PTR_ERR(ptr);

	ret = htab_map_update_elem(map, key, &ptr, map_flags);
	if (ret)
		map->ops->map_fd_put_ptr(ptr);

	return ret;
}

static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
{
	struct bpf_map *map, *inner_map_meta;

	inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
	if (IS_ERR(inner_map_meta))
		return inner_map_meta;

	map = fd_htab_map_alloc(attr);
	if (IS_ERR(map)) {
		bpf_map_meta_free(inner_map_meta);
		return map;
	}

	map->inner_map_meta = inner_map_meta;

	return map;
}

static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
{
	struct bpf_map **inner_map  = htab_map_lookup_elem(map, key);

	if (!inner_map)
		return NULL;

	return READ_ONCE(*inner_map);
}

static void htab_of_map_free(struct bpf_map *map)
{
	bpf_map_meta_free(map->inner_map_meta);
	fd_htab_map_free(map);
}

static const struct bpf_map_ops htab_of_map_ops = {
	.map_alloc = htab_of_map_alloc,
	.map_free = htab_of_map_free,
	.map_get_next_key = htab_map_get_next_key,
	.map_lookup_elem = htab_of_map_lookup_elem,
	.map_delete_elem = htab_map_delete_elem,
	.map_fd_get_ptr = bpf_map_fd_get_ptr,
	.map_fd_put_ptr = bpf_map_fd_put_ptr,
};

static struct bpf_map_type_list htab_of_map_type __ro_after_init = {
	.ops = &htab_of_map_ops,
	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
};

static int __init register_htab_map(void)
{
	bpf_register_map_type(&htab_type);
	bpf_register_map_type(&htab_percpu_type);
	bpf_register_map_type(&htab_lru_type);
	bpf_register_map_type(&htab_lru_percpu_type);
	bpf_register_map_type(&htab_of_map_type);
	return 0;
}
late_initcall(register_htab_map);
Loading