Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit db20fd2b authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by David S. Miller
Browse files

bpf: add lookup/update/delete/iterate methods to BPF maps



'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: default avatarAlexei Starovoitov <ast@plumgrid.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 749730ce
Loading
Loading
Loading
Loading
+8 −0
Original line number Original line Diff line number Diff line
@@ -9,6 +9,7 @@


#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/workqueue.h>
#include <linux/file.h>


struct bpf_map;
struct bpf_map;


@@ -17,6 +18,12 @@ struct bpf_map_ops {
	/* funcs callable from userspace (via syscall) */
	/* funcs callable from userspace (via syscall) */
	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
	void (*map_free)(struct bpf_map *);
	void (*map_free)(struct bpf_map *);
	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);

	/* funcs callable from userspace and from eBPF programs */
	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
	int (*map_delete_elem)(struct bpf_map *map, void *key);
};
};


struct bpf_map {
struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {


void bpf_register_map_type(struct bpf_map_type_list *tl);
void bpf_register_map_type(struct bpf_map_type_list *tl);
void bpf_map_put(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
struct bpf_map *bpf_map_get(struct fd f);


#endif /* _LINUX_BPF_H */
#endif /* _LINUX_BPF_H */
+38 −0
Original line number Original line Diff line number Diff line
@@ -70,6 +70,35 @@ enum bpf_cmd {
	 * map is deleted when fd is closed
	 * map is deleted when fd is closed
	 */
	 */
	BPF_MAP_CREATE,
	BPF_MAP_CREATE,

	/* lookup key in a given map
	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
	 * Using attr->map_fd, attr->key, attr->value
	 * returns zero and stores found elem into value
	 * or negative error
	 */
	BPF_MAP_LOOKUP_ELEM,

	/* create or update key/value pair in a given map
	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
	 * Using attr->map_fd, attr->key, attr->value
	 * returns zero or negative error
	 */
	BPF_MAP_UPDATE_ELEM,

	/* find and delete elem by key in a given map
	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
	 * Using attr->map_fd, attr->key
	 * returns zero or negative error
	 */
	BPF_MAP_DELETE_ELEM,

	/* lookup key in a given map and return next key
	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
	 * Using attr->map_fd, attr->key, attr->next_key
	 * returns zero and stores next key or negative error
	 */
	BPF_MAP_GET_NEXT_KEY,
};
};


enum bpf_map_type {
enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
		__u32	value_size;	/* size of value in bytes */
		__u32	value_size;	/* size of value in bytes */
		__u32	max_entries;	/* max number of entries in a map */
		__u32	max_entries;	/* max number of entries in a map */
	};
	};

	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
		__u32		map_fd;
		__aligned_u64	key;
		union {
			__aligned_u64 value;
			__aligned_u64 next_key;
		};
	};
} __attribute__((aligned(8)));
} __attribute__((aligned(8)));


#endif /* _UAPI__LINUX_BPF_H__ */
#endif /* _UAPI__LINUX_BPF_H__ */
+235 −0
Original line number Original line Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/syscalls.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/slab.h>
#include <linux/anon_inodes.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>


static LIST_HEAD(bpf_map_types);
static LIST_HEAD(bpf_map_types);


@@ -111,6 +112,228 @@ static int map_create(union bpf_attr *attr)
	return err;
	return err;
}
}


/* if error is returned, fd is released.
 * On success caller should complete fd access with matching fdput()
 */
struct bpf_map *bpf_map_get(struct fd f)
{
	struct bpf_map *map;

	if (!f.file)
		return ERR_PTR(-EBADF);

	if (f.file->f_op != &bpf_map_fops) {
		fdput(f);
		return ERR_PTR(-EINVAL);
	}

	map = f.file->private_data;

	return map;
}

/* helper to convert user pointers passed inside __aligned_u64 fields */
static void __user *u64_to_ptr(__u64 val)
{
	return (void __user *) (unsigned long) val;
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value

static int map_lookup_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *uvalue = u64_to_ptr(attr->value);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *value;
	int err;

	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ESRCH;
	rcu_read_lock();
	value = map->ops->map_lookup_elem(map, key);
	if (!value)
		goto err_unlock;

	err = -EFAULT;
	if (copy_to_user(uvalue, value, map->value_size) != 0)
		goto err_unlock;

	err = 0;

err_unlock:
	rcu_read_unlock();
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value

static int map_update_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *uvalue = u64_to_ptr(attr->value);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *value;
	int err;

	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ENOMEM;
	value = kmalloc(map->value_size, GFP_USER);
	if (!value)
		goto free_key;

	err = -EFAULT;
	if (copy_from_user(value, uvalue, map->value_size) != 0)
		goto free_value;

	/* eBPF program that use maps are running under rcu_read_lock(),
	 * therefore all map accessors rely on this fact, so do the same here
	 */
	rcu_read_lock();
	err = map->ops->map_update_elem(map, key, value);
	rcu_read_unlock();

free_value:
	kfree(value);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

#define BPF_MAP_DELETE_ELEM_LAST_FIELD key

static int map_delete_elem(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key;
	int err;

	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	rcu_read_lock();
	err = map->ops->map_delete_elem(map, key);
	rcu_read_unlock();

free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key

static int map_get_next_key(union bpf_attr *attr)
{
	void __user *ukey = u64_to_ptr(attr->key);
	void __user *unext_key = u64_to_ptr(attr->next_key);
	int ufd = attr->map_fd;
	struct fd f = fdget(ufd);
	struct bpf_map *map;
	void *key, *next_key;
	int err;

	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
		return -EINVAL;

	map = bpf_map_get(f);
	if (IS_ERR(map))
		return PTR_ERR(map);

	err = -ENOMEM;
	key = kmalloc(map->key_size, GFP_USER);
	if (!key)
		goto err_put;

	err = -EFAULT;
	if (copy_from_user(key, ukey, map->key_size) != 0)
		goto free_key;

	err = -ENOMEM;
	next_key = kmalloc(map->key_size, GFP_USER);
	if (!next_key)
		goto free_key;

	rcu_read_lock();
	err = map->ops->map_get_next_key(map, key, next_key);
	rcu_read_unlock();
	if (err)
		goto free_next_key;

	err = -EFAULT;
	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
		goto free_next_key;

	err = 0;

free_next_key:
	kfree(next_key);
free_key:
	kfree(key);
err_put:
	fdput(f);
	return err;
}

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
{
	union bpf_attr attr = {};
	union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
	case BPF_MAP_CREATE:
	case BPF_MAP_CREATE:
		err = map_create(&attr);
		err = map_create(&attr);
		break;
		break;
	case BPF_MAP_LOOKUP_ELEM:
		err = map_lookup_elem(&attr);
		break;
	case BPF_MAP_UPDATE_ELEM:
		err = map_update_elem(&attr);
		break;
	case BPF_MAP_DELETE_ELEM:
		err = map_delete_elem(&attr);
		break;
	case BPF_MAP_GET_NEXT_KEY:
		err = map_get_next_key(&attr);
		break;
	default:
	default:
		err = -EINVAL;
		err = -EINVAL;
		break;
		break;