Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8ddbb312 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bpf-htab-fixes'



Alexei Starovoitov says:

====================
bpf: htab fixes

Two bpf hashtable fixes. See individual patches for details.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 745cb7f8 4fe84359
Loading
Loading
Loading
Loading
+5 −0
Original line number Original line Diff line number Diff line
@@ -29,6 +29,11 @@ struct hlist_nulls_node {
	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))


#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)

#define hlist_nulls_entry_safe(ptr, type, member) \
	({ typeof(ptr) ____ptr = (ptr); \
	   !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
	})
/**
/**
 * ptr_is_a_nulls - Test if a ptr is a nulls
 * ptr_is_a_nulls - Test if a ptr is a nulls
 * @ptr: ptr to be tested
 * @ptr: ptr to be tested
+14 −0
Original line number Original line Diff line number Diff line
@@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
		pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))


/**
 * hlist_nulls_for_each_entry_safe -
 *   iterate over list of given type safe against removal of list entry
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
 * @head:	the head for your list.
 * @member:	the name of the hlist_nulls_node within the struct.
 */
#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)		\
	for (({barrier();}),							\
	     pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));		\
		(!is_a_nulls(pos)) &&						\
		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);	\
		   pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
#endif
#endif
#endif
#endif
+80 −39
Original line number Original line Diff line number Diff line
@@ -13,11 +13,12 @@
#include <linux/bpf.h>
#include <linux/bpf.h>
#include <linux/jhash.h>
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include "percpu_freelist.h"
#include "percpu_freelist.h"
#include "bpf_lru_list.h"
#include "bpf_lru_list.h"


struct bucket {
struct bucket {
	struct hlist_head head;
	struct hlist_nulls_head head;
	raw_spinlock_t lock;
	raw_spinlock_t lock;
};
};


@@ -44,10 +45,15 @@ enum extra_elem_state {
/* each htab element is struct htab_elem + key + value */
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
struct htab_elem {
	union {
	union {
		struct hlist_node hash_node;
		struct hlist_nulls_node hash_node;
		struct {
			void *padding;
			union {
				struct bpf_htab *htab;
				struct bpf_htab *htab;
				struct pcpu_freelist_node fnode;
				struct pcpu_freelist_node fnode;
			};
			};
		};
	};
	union {
	union {
		struct rcu_head rcu;
		struct rcu_head rcu;
		enum extra_elem_state state;
		enum extra_elem_state state;
@@ -162,7 +168,8 @@ static int prealloc_init(struct bpf_htab *htab)
				 offsetof(struct htab_elem, lru_node),
				 offsetof(struct htab_elem, lru_node),
				 htab->elem_size, htab->map.max_entries);
				 htab->elem_size, htab->map.max_entries);
	else
	else
		pcpu_freelist_populate(&htab->freelist, htab->elems,
		pcpu_freelist_populate(&htab->freelist,
				       htab->elems + offsetof(struct htab_elem, fnode),
				       htab->elem_size, htab->map.max_entries);
				       htab->elem_size, htab->map.max_entries);


	return 0;
	return 0;
@@ -217,6 +224,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
	int err, i;
	int err, i;
	u64 cost;
	u64 cost;


	BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
		     offsetof(struct htab_elem, hash_node.pprev));
	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
		     offsetof(struct htab_elem, hash_node.pprev));

	if (lru && !capable(CAP_SYS_ADMIN))
	if (lru && !capable(CAP_SYS_ADMIN))
		/* LRU implementation is much complicated than other
		/* LRU implementation is much complicated than other
		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
@@ -326,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
		goto free_htab;
		goto free_htab;


	for (i = 0; i < htab->n_buckets; i++) {
	for (i = 0; i < htab->n_buckets; i++) {
		INIT_HLIST_HEAD(&htab->buckets[i].head);
		INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
		raw_spin_lock_init(&htab->buckets[i].lock);
		raw_spin_lock_init(&htab->buckets[i].lock);
	}
	}


@@ -366,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
	return &htab->buckets[hash & (htab->n_buckets - 1)];
	return &htab->buckets[hash & (htab->n_buckets - 1)];
}
}


static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
{
	return &__select_bucket(htab, hash)->head;
	return &__select_bucket(htab, hash)->head;
}
}


static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
/* this lookup function can only be called with bucket lock taken */
static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
					 void *key, u32 key_size)
					 void *key, u32 key_size)
{
{
	struct hlist_nulls_node *n;
	struct htab_elem *l;
	struct htab_elem *l;


	hlist_for_each_entry_rcu(l, head, hash_node)
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
		if (l->hash == hash && !memcmp(&l->key, key, key_size))
		if (l->hash == hash && !memcmp(&l->key, key, key_size))
			return l;
			return l;


	return NULL;
	return NULL;
}
}


/* can be called without bucket lock. it will repeat the loop in
 * the unlikely event when elements moved from one bucket into another
 * while link list is being walked
 */
static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
					       u32 hash, void *key,
					       u32 key_size, u32 n_buckets)
{
	struct hlist_nulls_node *n;
	struct htab_elem *l;

again:
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
		if (l->hash == hash && !memcmp(&l->key, key, key_size))
			return l;

	if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
		goto again;

	return NULL;
}

/* Called from syscall or from eBPF program */
/* Called from syscall or from eBPF program */
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	struct htab_elem *l;
	struct htab_elem *l;
	u32 hash, key_size;
	u32 hash, key_size;


@@ -400,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)


	head = select_bucket(htab, hash);
	head = select_bucket(htab, hash);


	l = lookup_elem_raw(head, hash, key, key_size);
	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);


	return l;
	return l;
}
}
@@ -433,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
{
{
	struct bpf_htab *htab = (struct bpf_htab *)arg;
	struct bpf_htab *htab = (struct bpf_htab *)arg;
	struct htab_elem *l, *tgt_l;
	struct htab_elem *l = NULL, *tgt_l;
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	struct hlist_nulls_node *n;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b;
	struct bucket *b;


@@ -444,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)


	raw_spin_lock_irqsave(&b->lock, flags);
	raw_spin_lock_irqsave(&b->lock, flags);


	hlist_for_each_entry_rcu(l, head, hash_node)
	hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
		if (l == tgt_l) {
		if (l == tgt_l) {
			hlist_del_rcu(&l->hash_node);
			hlist_nulls_del_rcu(&l->hash_node);
			break;
			break;
		}
		}


@@ -459,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	struct htab_elem *l, *next_l;
	struct htab_elem *l, *next_l;
	u32 hash, key_size;
	u32 hash, key_size;
	int i;
	int i;
@@ -473,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
	head = select_bucket(htab, hash);
	head = select_bucket(htab, hash);


	/* lookup the key */
	/* lookup the key */
	l = lookup_elem_raw(head, hash, key, key_size);
	l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);


	if (!l) {
	if (!l) {
		i = 0;
		i = 0;
@@ -481,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
	}
	}


	/* key was found, get next key in the same bucket */
	/* key was found, get next key in the same bucket */
	next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
	next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
				  struct htab_elem, hash_node);
				  struct htab_elem, hash_node);


	if (next_l) {
	if (next_l) {
@@ -500,7 +537,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
		head = select_bucket(htab, i);
		head = select_bucket(htab, i);


		/* pick first element in the bucket */
		/* pick first element in the bucket */
		next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
		next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
					  struct htab_elem, hash_node);
					  struct htab_elem, hash_node);
		if (next_l) {
		if (next_l) {
			/* if it's not empty, just return it */
			/* if it's not empty, just return it */
@@ -582,9 +619,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
	int err = 0;
	int err = 0;


	if (prealloc) {
	if (prealloc) {
		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
		struct pcpu_freelist_node *l;
		if (!l_new)

		l = pcpu_freelist_pop(&htab->freelist);
		if (!l)
			err = -E2BIG;
			err = -E2BIG;
		else
			l_new = container_of(l, struct htab_elem, fnode);
	} else {
	} else {
		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
			atomic_dec(&htab->count);
			atomic_dec(&htab->count);
@@ -661,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct htab_elem *l_new = NULL, *l_old;
	struct htab_elem *l_new = NULL, *l_old;
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b;
	struct bucket *b;
	u32 key_size, hash;
	u32 key_size, hash;
@@ -700,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
	/* add new element to the head of the list, so that
	/* add new element to the head of the list, so that
	 * concurrent search will find it before old elem
	 * concurrent search will find it before old elem
	 */
	 */
	hlist_add_head_rcu(&l_new->hash_node, head);
	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
	if (l_old) {
	if (l_old) {
		hlist_del_rcu(&l_old->hash_node);
		hlist_nulls_del_rcu(&l_old->hash_node);
		free_htab_elem(htab, l_old);
		free_htab_elem(htab, l_old);
	}
	}
	ret = 0;
	ret = 0;
@@ -716,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct htab_elem *l_new, *l_old = NULL;
	struct htab_elem *l_new, *l_old = NULL;
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b;
	struct bucket *b;
	u32 key_size, hash;
	u32 key_size, hash;
@@ -757,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
	/* add new element to the head of the list, so that
	/* add new element to the head of the list, so that
	 * concurrent search will find it before old elem
	 * concurrent search will find it before old elem
	 */
	 */
	hlist_add_head_rcu(&l_new->hash_node, head);
	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
	if (l_old) {
	if (l_old) {
		bpf_lru_node_set_ref(&l_new->lru_node);
		bpf_lru_node_set_ref(&l_new->lru_node);
		hlist_del_rcu(&l_old->hash_node);
		hlist_nulls_del_rcu(&l_old->hash_node);
	}
	}
	ret = 0;
	ret = 0;


@@ -781,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct htab_elem *l_new = NULL, *l_old;
	struct htab_elem *l_new = NULL, *l_old;
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b;
	struct bucket *b;
	u32 key_size, hash;
	u32 key_size, hash;
@@ -820,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
			ret = PTR_ERR(l_new);
			ret = PTR_ERR(l_new);
			goto err;
			goto err;
		}
		}
		hlist_add_head_rcu(&l_new->hash_node, head);
		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
	}
	}
	ret = 0;
	ret = 0;
err:
err:
@@ -834,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct htab_elem *l_new = NULL, *l_old;
	struct htab_elem *l_new = NULL, *l_old;
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	unsigned long flags;
	unsigned long flags;
	struct bucket *b;
	struct bucket *b;
	u32 key_size, hash;
	u32 key_size, hash;
@@ -882,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
	} else {
	} else {
		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
				value, onallcpus);
				value, onallcpus);
		hlist_add_head_rcu(&l_new->hash_node, head);
		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
		l_new = NULL;
		l_new = NULL;
	}
	}
	ret = 0;
	ret = 0;
@@ -910,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
static int htab_map_delete_elem(struct bpf_map *map, void *key)
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	struct bucket *b;
	struct bucket *b;
	struct htab_elem *l;
	struct htab_elem *l;
	unsigned long flags;
	unsigned long flags;
@@ -930,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
	l = lookup_elem_raw(head, hash, key, key_size);
	l = lookup_elem_raw(head, hash, key, key_size);


	if (l) {
	if (l) {
		hlist_del_rcu(&l->hash_node);
		hlist_nulls_del_rcu(&l->hash_node);
		free_htab_elem(htab, l);
		free_htab_elem(htab, l);
		ret = 0;
		ret = 0;
	}
	}
@@ -942,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
{
{
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
	struct hlist_head *head;
	struct hlist_nulls_head *head;
	struct bucket *b;
	struct bucket *b;
	struct htab_elem *l;
	struct htab_elem *l;
	unsigned long flags;
	unsigned long flags;
@@ -962,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
	l = lookup_elem_raw(head, hash, key, key_size);
	l = lookup_elem_raw(head, hash, key, key_size);


	if (l) {
	if (l) {
		hlist_del_rcu(&l->hash_node);
		hlist_nulls_del_rcu(&l->hash_node);
		ret = 0;
		ret = 0;
	}
	}


@@ -977,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab)
	int i;
	int i;


	for (i = 0; i < htab->n_buckets; i++) {
	for (i = 0; i < htab->n_buckets; i++) {
		struct hlist_head *head = select_bucket(htab, i);
		struct hlist_nulls_head *head = select_bucket(htab, i);
		struct hlist_node *n;
		struct hlist_nulls_node *n;
		struct htab_elem *l;
		struct htab_elem *l;


		hlist_for_each_entry_safe(l, n, head, hash_node) {
		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
			hlist_del_rcu(&l->hash_node);
			hlist_nulls_del_rcu(&l->hash_node);
			if (l->state != HTAB_EXTRA_ELEM_USED)
			if (l->state != HTAB_EXTRA_ELEM_USED)
				htab_elem_free(htab, l);
				htab_elem_free(htab, l);
		}
		}