Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 60175ccd authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for your net-next
tree.  Most relevant updates are the removal of per-conntrack timers to
use a workqueue/garbage collection approach instead from Florian
Westphal, the hash and numgen expression for nf_tables from Laura
Garcia, updates on nf_tables hash set to honor the NLM_F_EXCL flag,
removal of ip_conntrack sysctl and many other incremental updates on our
Netfilter codebase.

More specifically, they are:

1) Retrieve only 4 bytes to fetch ports in case of non-linear skb
   transport area in dccp, sctp, tcp, udp and udplite protocol
   conntrackers, from Gao Feng.

2) Missing whitespace on error message in physdev match, from Hangbin Liu.

3) Skip redundant IPv4 checksum calculation in nf_dup_ipv4, from Liping Zhang.

4) Add nf_ct_expires() helper function and use it, from Florian Westphal.

5) Replace opencoded nf_ct_kill() call in IPVS conntrack support, also
   from Florian.

6) Rename nf_tables set implementation to nft_set_{name}.c

7) Introduce the hash expression to allow arbitrary hashing of selector
   concatenations, from Laura Garcia Liebana.

8) Remove ip_conntrack sysctl backward compatibility code, this code has
   been around for long time already, and we have two interfaces to do
   this already: nf_conntrack sysctl and ctnetlink.

9) Use nf_conntrack_get_ht() helper function whenever possible, instead
   of opencoding fetch of hashtable pointer and size, patch from Liping Zhang.

10) Add quota expression for nf_tables.

11) Add number generator expression for nf_tables, this supports
    incremental and random generators that can be combined with maps,
    very useful for load balancing purpose, again from Laura Garcia Liebana.

12) Fix a typo in a debug message in FTP conntrack helper, from Colin Ian King.

13) Introduce a nft_chain_parse_hook() helper function to parse chain hook
    configuration, this is used by a follow up patch to perform better chain
    update validation.

14) Add rhashtable_lookup_get_insert_key() to rhashtable and use it from the
    nft_set_hash implementation to honor the NLM_F_EXCL flag.

15) Missing nulls check in nf_conntrack from nf_conntrack_tuple_taken(),
    patch from Florian Westphal.

16) Don't use the DYING bit to know if the conntrack event has been already
    delivered, instead a state variable to track event re-delivery
    states, also from Florian.

17) Remove the per-conntrack timer, use the workqueue approach that was
    discussed during the NFWS, from Florian Westphal.

18) Use the netlink conntrack table dump path to kill stale entries,
    again from Florian.

19) Add a garbage collector to get rid of stale conntracks, from
    Florian.

20) Reschedule garbage collector if eviction rate is high.

21) Get rid of the __nf_ct_kill_acct() helper.

22) Use ARPHRD_ETHER instead of hardcoded 1 from ARP logger.

23) Make nf_log_set() interface assertive on unsupported families.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2f5281ba 779994fa
Loading
Loading
Loading
Loading
+57 −13
Original line number Diff line number Diff line
@@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht,
struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
					    const void *key,
					    struct rhash_head *obj,
					    struct bucket_table *old_tbl);
					    struct bucket_table *old_tbl,
					    void **data);
int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);

void rhashtable_walk_enter(struct rhashtable *ht,
@@ -563,8 +564,11 @@ static inline void *rhashtable_lookup_fast(
	return NULL;
}

/* Internal function, please use rhashtable_insert_fast() instead */
static inline int __rhashtable_insert_fast(
/* Internal function, please use rhashtable_insert_fast() instead. This
 * function returns the existing element already in hashes in there is a clash,
 * otherwise it returns an error via ERR_PTR().
 */
static inline void *__rhashtable_insert_fast(
	struct rhashtable *ht, const void *key, struct rhash_head *obj,
	const struct rhashtable_params params)
{
@@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast(
	spinlock_t *lock;
	unsigned int elasticity;
	unsigned int hash;
	void *data = NULL;
	int err;

restart:
@@ -601,11 +606,14 @@ static inline int __rhashtable_insert_fast(

	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
	if (unlikely(new_tbl)) {
		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
		if (!IS_ERR_OR_NULL(tbl))
			goto slow_path;

		err = PTR_ERR(tbl);
		if (err == -EEXIST)
			err = 0;

		goto out;
	}

@@ -619,25 +627,25 @@ static inline int __rhashtable_insert_fast(
		err = rhashtable_insert_rehash(ht, tbl);
		rcu_read_unlock();
		if (err)
			return err;
			return ERR_PTR(err);

		goto restart;
	}

	err = -EEXIST;
	err = 0;
	elasticity = ht->elasticity;
	rht_for_each(head, tbl, hash) {
		if (key &&
		    unlikely(!(params.obj_cmpfn ?
			       params.obj_cmpfn(&arg, rht_obj(ht, head)) :
			       rhashtable_compare(&arg, rht_obj(ht, head)))))
			       rhashtable_compare(&arg, rht_obj(ht, head))))) {
			data = rht_obj(ht, head);
			goto out;
		}
		if (!--elasticity)
			goto slow_path;
	}

	err = 0;

	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);

	RCU_INIT_POINTER(obj->next, head);
@@ -652,7 +660,7 @@ static inline int __rhashtable_insert_fast(
	spin_unlock_bh(lock);
	rcu_read_unlock();

	return err;
	return err ? ERR_PTR(err) : data;
}

/**
@@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast(
	struct rhashtable *ht, struct rhash_head *obj,
	const struct rhashtable_params params)
{
	return __rhashtable_insert_fast(ht, NULL, obj, params);
	void *ret;

	ret = __rhashtable_insert_fast(ht, NULL, obj, params);
	if (IS_ERR(ret))
		return PTR_ERR(ret);

	return ret == NULL ? 0 : -EEXIST;
}

/**
@@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast(
	const struct rhashtable_params params)
{
	const char *key = rht_obj(ht, obj);
	void *ret;

	BUG_ON(ht->p.obj_hashfn);

	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
					params);
	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
	if (IS_ERR(ret))
		return PTR_ERR(ret);

	return ret == NULL ? 0 : -EEXIST;
}

/**
@@ -736,6 +754,32 @@ static inline int rhashtable_lookup_insert_fast(
static inline int rhashtable_lookup_insert_key(
	struct rhashtable *ht, const void *key, struct rhash_head *obj,
	const struct rhashtable_params params)
{
	void *ret;

	BUG_ON(!ht->p.obj_hashfn || !key);

	ret = __rhashtable_insert_fast(ht, key, obj, params);
	if (IS_ERR(ret))
		return PTR_ERR(ret);

	return ret == NULL ? 0 : -EEXIST;
}

/**
 * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
 * @ht:		hash table
 * @obj:	pointer to hash head inside object
 * @params:	hash table parameters
 * @data:	pointer to element data already in hashes
 *
 * Just like rhashtable_lookup_insert_key(), but this function returns the
 * object if it exists, NULL if it does not and the insertion was successful,
 * and an ERR_PTR otherwise.
 */
static inline void *rhashtable_lookup_get_insert_key(
	struct rhashtable *ht, const void *key, struct rhash_head *obj,
	const struct rhashtable_params params)
{
	BUG_ON(!ht->p.obj_hashfn || !key);

+41 −15
Original line number Diff line number Diff line
@@ -42,7 +42,6 @@ union nf_conntrack_expect_proto {

#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/timer.h>

#ifdef CONFIG_NETFILTER_DEBUG
#define NF_CT_ASSERT(x)		WARN_ON(!(x))
@@ -73,7 +72,7 @@ struct nf_conn_help {
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>

struct nf_conn {
	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
	/* Usage count in here is 1 for hash table, 1 per skb,
	 * plus 1 for any connection(s) we are `master' for
	 *
	 * Hint, SKB address this struct and refcnt via skb->nfct and
@@ -96,8 +95,8 @@ struct nf_conn {
	/* Have we seen traffic both ways yet? (bitset) */
	unsigned long status;

	/* Timer function; drops refcnt when it goes off. */
	struct timer_list timeout;
	/* jiffies32 when this ct is considered dead */
	u32 timeout;

	possible_net_t ct_net;

@@ -220,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct,
	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
}

bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
		       const struct sk_buff *skb, int do_acct);

/* kill conntrack and do accounting */
static inline bool nf_ct_kill_acct(struct nf_conn *ct,
				   enum ip_conntrack_info ctinfo,
				   const struct sk_buff *skb)
{
	return __nf_ct_kill_acct(ct, ctinfo, skb, 1);
}
bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
		     const struct sk_buff *skb);

/* kill conntrack without accounting */
static inline bool nf_ct_kill(struct nf_conn *ct)
{
	return __nf_ct_kill_acct(ct, 0, NULL, 0);
	return nf_ct_delete(ct, 0, 0);
}

/* These are for NAT.  Icky. */
@@ -291,21 +283,55 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb)
	return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
}

#define nfct_time_stamp ((u32)(jiffies))

/* jiffies until ct expires, 0 if already expired */
static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
{
	long timeout = (long)ct->timeout.expires - (long)jiffies;
	s32 timeout = ct->timeout - nfct_time_stamp;

	return timeout > 0 ? timeout : 0;
}

static inline bool nf_ct_is_expired(const struct nf_conn *ct)
{
	return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
}

/* use after obtaining a reference count */
static inline bool nf_ct_should_gc(const struct nf_conn *ct)
{
	return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
	       !nf_ct_is_dying(ct);
}

struct kernel_param;

int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
int nf_conntrack_hash_resize(unsigned int hashsize);

extern struct hlist_nulls_head *nf_conntrack_hash;
extern unsigned int nf_conntrack_htable_size;
extern seqcount_t nf_conntrack_generation;
extern unsigned int nf_conntrack_max;

/* must be called with rcu read lock held */
static inline void
nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
{
	struct hlist_nulls_head *hptr;
	unsigned int sequence, hsz;

	do {
		sequence = read_seqcount_begin(&nf_conntrack_generation);
		hsz = nf_conntrack_htable_size;
		hptr = nf_conntrack_hash;
	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));

	*hash = hptr;
	*hsize = hsz;
}

struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
				 const struct nf_conntrack_zone *zone,
				 gfp_t flags);
+0 −3
Original line number Diff line number Diff line
@@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
			const struct nf_conntrack_l3proto *l3proto,
			const struct nf_conntrack_l4proto *l4proto);

void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);

/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net,
@@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,

#define CONNTRACK_LOCKS 1024

extern struct hlist_nulls_head *nf_conntrack_hash;
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
void nf_conntrack_lock(spinlock_t *lock);

+12 −5
Original line number Diff line number Diff line
@@ -12,12 +12,19 @@
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>

enum nf_ct_ecache_state {
	NFCT_ECACHE_UNKNOWN,		/* destroy event not sent */
	NFCT_ECACHE_DESTROY_FAIL,	/* tried but failed to send destroy event */
	NFCT_ECACHE_DESTROY_SENT,	/* sent destroy event after failure */
};

struct nf_conntrack_ecache {
	unsigned long cache;		/* bitops want long */
	unsigned long missed;		/* missed events */
	u16 ctmask;			/* bitmask of ct events to be delivered */
	u16 expmask;			/* bitmask of expect events to be delivered */
	u32 portid;			/* netlink portid of destroyer */
	enum nf_ct_ecache_state state;	/* ecache state */
};

static inline struct nf_conntrack_ecache *
+0 −8
Original line number Diff line number Diff line
@@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);

static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
{
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
	kfree(pn->ctl_compat_table);
	pn->ctl_compat_table = NULL;
#endif
}

/* Generic netlink helpers */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
			       const struct nf_conntrack_tuple *tuple);
Loading