Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cfe4a9dd authored by Thomas Graf's avatar Thomas Graf Committed by David S. Miller
Browse files

nftables: Convert nft_hash to use generic rhashtable



The sizing of the hash table and the practice of requiring a lookup
to retrieve the pprev to be stored in the element cookie before the
deletion of an entry is left intact.

Signed-off-by: default avatarThomas Graf <tgraf@suug.ch>
Acked-by: default avatarPatrick McHardy <kaber@trash.net>
Reviewed-by: default avatarNikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e341694e
Loading
Loading
Loading
Loading
+55 −236
Original line number Diff line number Diff line
@@ -15,209 +15,40 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
#include <linux/vmalloc.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>

#define NFT_HASH_MIN_SIZE	4UL

struct nft_hash {
	struct nft_hash_table __rcu	*tbl;
};

struct nft_hash_table {
	unsigned int			size;
	struct nft_hash_elem __rcu	*buckets[];
};
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_HASH_ELEMENT_HINT 3

struct nft_hash_elem {
	struct nft_hash_elem __rcu	*next;
	struct rhash_head		node;
	struct nft_data			key;
	struct nft_data			data[];
};

#define nft_hash_for_each_entry(i, head) \
	for (i = nft_dereference(head); i != NULL; i = nft_dereference(i->next))
#define nft_hash_for_each_entry_rcu(i, head) \
	for (i = rcu_dereference(head); i != NULL; i = rcu_dereference(i->next))

static u32 nft_hash_rnd __read_mostly;
static bool nft_hash_rnd_initted __read_mostly;

static unsigned int nft_hash_data(const struct nft_data *data,
				  unsigned int hsize, unsigned int len)
{
	unsigned int h;

	h = jhash(data->data, len, nft_hash_rnd);
	return h & (hsize - 1);
}

static bool nft_hash_lookup(const struct nft_set *set,
			    const struct nft_data *key,
			    struct nft_data *data)
{
	const struct nft_hash *priv = nft_set_priv(set);
	const struct nft_hash_table *tbl = rcu_dereference(priv->tbl);
	const struct rhashtable *priv = nft_set_priv(set);
	const struct nft_hash_elem *he;
	unsigned int h;

	h = nft_hash_data(key, tbl->size, set->klen);
	nft_hash_for_each_entry_rcu(he, tbl->buckets[h]) {
		if (nft_data_cmp(&he->key, key, set->klen))
			continue;
		if (set->flags & NFT_SET_MAP)
	he = rhashtable_lookup(priv, key);
	if (he && set->flags & NFT_SET_MAP)
		nft_data_copy(data, he->data);
		return true;
	}
	return false;
}

static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
{
	kvfree(tbl);
}

static unsigned int nft_hash_tbl_size(unsigned int nelem)
{
	return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
}

static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
{
	struct nft_hash_table *tbl;
	size_t size;

	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
	tbl = kzalloc(size, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN);
	if (tbl == NULL)
		tbl = vzalloc(size);
	if (tbl == NULL)
		return NULL;
	tbl->size = nbuckets;

	return tbl;
}

static void nft_hash_chain_unzip(const struct nft_set *set,
				 const struct nft_hash_table *ntbl,
				 struct nft_hash_table *tbl, unsigned int n)
{
	struct nft_hash_elem *he, *last, *next;
	unsigned int h;

	he = nft_dereference(tbl->buckets[n]);
	if (he == NULL)
		return;
	h = nft_hash_data(&he->key, ntbl->size, set->klen);

	/* Find last element of first chain hashing to bucket h */
	last = he;
	nft_hash_for_each_entry(he, he->next) {
		if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
			break;
		last = he;
	}

	/* Unlink first chain from the old table */
	RCU_INIT_POINTER(tbl->buckets[n], last->next);

	/* If end of chain reached, done */
	if (he == NULL)
		return;

	/* Find first element of second chain hashing to bucket h */
	next = NULL;
	nft_hash_for_each_entry(he, he->next) {
		if (nft_hash_data(&he->key, ntbl->size, set->klen) != h)
			continue;
		next = he;
		break;
	}

	/* Link the two chains */
	RCU_INIT_POINTER(last->next, next);
}

static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
{
	struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
	struct nft_hash_elem *he;
	unsigned int i, h;
	bool complete;

	ntbl = nft_hash_tbl_alloc(tbl->size * 2);
	if (ntbl == NULL)
		return -ENOMEM;

	/* Link new table's buckets to first element in the old table
	 * hashing to the new bucket.
	 */
	for (i = 0; i < ntbl->size; i++) {
		h = i < tbl->size ? i : i - tbl->size;
		nft_hash_for_each_entry(he, tbl->buckets[h]) {
			if (nft_hash_data(&he->key, ntbl->size, set->klen) != i)
				continue;
			RCU_INIT_POINTER(ntbl->buckets[i], he);
			break;
		}
	}

	/* Publish new table */
	rcu_assign_pointer(priv->tbl, ntbl);

	/* Unzip interleaved hash chains */
	do {
		/* Wait for readers to use new table/unzipped chains */
		synchronize_rcu();

		complete = true;
		for (i = 0; i < tbl->size; i++) {
			nft_hash_chain_unzip(set, ntbl, tbl, i);
			if (tbl->buckets[i] != NULL)
				complete = false;
		}
	} while (!complete);

	nft_hash_tbl_free(tbl);
	return 0;
}

static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
{
	struct nft_hash_table *tbl = nft_dereference(priv->tbl), *ntbl;
	struct nft_hash_elem __rcu **pprev;
	unsigned int i;

	ntbl = nft_hash_tbl_alloc(tbl->size / 2);
	if (ntbl == NULL)
		return -ENOMEM;

	for (i = 0; i < ntbl->size; i++) {
		ntbl->buckets[i] = tbl->buckets[i];

		for (pprev = &ntbl->buckets[i]; *pprev != NULL;
		     pprev = &nft_dereference(*pprev)->next)
			;
		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
	}

	/* Publish new table */
	rcu_assign_pointer(priv->tbl, ntbl);
	synchronize_rcu();

	nft_hash_tbl_free(tbl);
	return 0;
	return !!he;
}

static int nft_hash_insert(const struct nft_set *set,
			   const struct nft_set_elem *elem)
{
	struct nft_hash *priv = nft_set_priv(set);
	struct nft_hash_table *tbl = nft_dereference(priv->tbl);
	struct rhashtable *priv = nft_set_priv(set);
	struct nft_hash_elem *he;
	unsigned int size, h;
	unsigned int size;

	if (elem->flags != 0)
		return -EINVAL;
@@ -234,13 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
	if (set->flags & NFT_SET_MAP)
		nft_data_copy(he->data, &elem->data);

	h = nft_hash_data(&he->key, tbl->size, set->klen);
	RCU_INIT_POINTER(he->next, tbl->buckets[h]);
	rcu_assign_pointer(tbl->buckets[h], he);

	/* Expand table when exceeding 75% load */
	if (set->nelems + 1 > tbl->size / 4 * 3)
		nft_hash_tbl_expand(set, priv);
	rhashtable_insert(priv, &he->node, GFP_KERNEL);

	return 0;
}
@@ -257,36 +82,31 @@ static void nft_hash_elem_destroy(const struct nft_set *set,
static void nft_hash_remove(const struct nft_set *set,
			    const struct nft_set_elem *elem)
{
	struct nft_hash *priv = nft_set_priv(set);
	struct nft_hash_table *tbl = nft_dereference(priv->tbl);
	struct nft_hash_elem *he, __rcu **pprev;
	struct rhashtable *priv = nft_set_priv(set);
	struct rhash_head *he, __rcu **pprev;

	pprev = elem->cookie;
	he = nft_dereference((*pprev));
	he = rht_dereference((*pprev), priv);

	rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);

	RCU_INIT_POINTER(*pprev, he->next);
	synchronize_rcu();
	kfree(he);

	/* Shrink table beneath 30% load */
	if (set->nelems - 1 < tbl->size * 3 / 10 &&
	    tbl->size > NFT_HASH_MIN_SIZE)
		nft_hash_tbl_shrink(set, priv);
}

static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
{
	const struct nft_hash *priv = nft_set_priv(set);
	const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
	struct nft_hash_elem __rcu * const *pprev;
	const struct rhashtable *priv = nft_set_priv(set);
	const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
	struct rhash_head __rcu * const *pprev;
	struct nft_hash_elem *he;
	unsigned int h;
	u32 h;

	h = nft_hash_data(&elem->key, tbl->size, set->klen);
	h = rhashtable_hashfn(priv, &elem->key, set->klen);
	pprev = &tbl->buckets[h];
	nft_hash_for_each_entry(he, tbl->buckets[h]) {
	rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
		if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
			pprev = &he->next;
			pprev = &he->node.next;
			continue;
		}

@@ -302,14 +122,15 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
			  struct nft_set_iter *iter)
{
	const struct nft_hash *priv = nft_set_priv(set);
	const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
	const struct rhashtable *priv = nft_set_priv(set);
	const struct bucket_table *tbl;
	const struct nft_hash_elem *he;
	struct nft_set_elem elem;
	unsigned int i;

	tbl = rht_dereference_rcu(priv->tbl, priv);
	for (i = 0; i < tbl->size; i++) {
		nft_hash_for_each_entry(he, tbl->buckets[i]) {
		rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
			if (iter->count < iter->skip)
				goto cont;

@@ -329,48 +150,46 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,

static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
	return sizeof(struct nft_hash);
	return sizeof(struct rhashtable);
}

static int lockdep_nfnl_lock_is_held(void)
{
	return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
}

static int nft_hash_init(const struct nft_set *set,
			 const struct nft_set_desc *desc,
			 const struct nlattr * const tb[])
{
	struct nft_hash *priv = nft_set_priv(set);
	struct nft_hash_table *tbl;
	unsigned int size;
	struct rhashtable *priv = nft_set_priv(set);
	struct rhashtable_params params = {
		.nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
		.head_offset = offsetof(struct nft_hash_elem, node),
		.key_offset = offsetof(struct nft_hash_elem, key),
		.key_len = set->klen,
		.hashfn = jhash,
		.grow_decision = rht_grow_above_75,
		.shrink_decision = rht_shrink_below_30,
		.mutex_is_held = lockdep_nfnl_lock_is_held,
	};

	if (unlikely(!nft_hash_rnd_initted)) {
		get_random_bytes(&nft_hash_rnd, 4);
		nft_hash_rnd_initted = true;
	}

	size = NFT_HASH_MIN_SIZE;
	if (desc->size)
		size = nft_hash_tbl_size(desc->size);

	tbl = nft_hash_tbl_alloc(size);
	if (tbl == NULL)
		return -ENOMEM;
	RCU_INIT_POINTER(priv->tbl, tbl);
	return 0;
	return rhashtable_init(priv, &params);
}

static void nft_hash_destroy(const struct nft_set *set)
{
	const struct nft_hash *priv = nft_set_priv(set);
	const struct nft_hash_table *tbl = nft_dereference(priv->tbl);
	const struct rhashtable *priv = nft_set_priv(set);
	const struct bucket_table *tbl;
	struct nft_hash_elem *he, *next;
	unsigned int i;

	for (i = 0; i < tbl->size; i++) {
		for (he = nft_dereference(tbl->buckets[i]); he != NULL;
		     he = next) {
			next = nft_dereference(he->next);
	tbl = rht_dereference(priv->tbl, priv);
	for (i = 0; i < tbl->size; i++)
		rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node)
			nft_hash_elem_destroy(set, he);
		}
	}
	kfree(tbl);

	rhashtable_destroy(priv);
}

static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -383,8 +202,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
		esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);

	if (desc->size) {
		est->size = sizeof(struct nft_hash) +
			    nft_hash_tbl_size(desc->size) *
		est->size = sizeof(struct rhashtable) +
			    roundup_pow_of_two(desc->size * 4 / 3) *
			    sizeof(struct nft_hash_elem *) +
			    desc->size * esize;
	} else {