Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e86e180b authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:

* cleanup to remove double semicolon from stephen hemminger.

* calm down sparse warning in xt_ipcomp, from Fan Du.

* nf_ct_labels support for nf_tables, from Florian Westphal.

* new macros to simplify rcu dereferences in the scope of nfnetlink
  and nf_tables, from Patrick McHardy.

* Accept queue and drop (including reason for drop) to verdict
  parsing in nf_tables, also from Patrick.

* Remove unused random seed initialization in nfnetlink_log, from
  Florian Westphal.

* Allow to attach user-specific information to nf_tables rules, useful
  to attach user comments to rule, from me.

* Return errors in ipset according to the manpage documentation, from
  Jozsef Kadlecsik.

* Fix coccinelle warnings related to incorrect bool type usage for ipset,
  from Fengguang Wu.

* Add hash:ip,mark set type to ipset, from Vytas Dauksa.

* Fix message for each spotted by ipset for each netns that is created,
  from Ilia Mirkin.

* Add forceadd option to ipset, which evicts a random entry from the set
  if it becomes full, from Josh Hunt.

* Minor IPVS cleanups and fixes from Andi Kleen and Tingwei Liu.

* Improve conntrack scalability by removing a central spinlock, original
  work from Eric Dumazet. Jesper Dangaard Brouer took them over to address
  remaining issues. Several patches to prepare this change come in first
  place.

* Rework nft_hash to resolve bugs (leaking chain, missing rcu synchronization
  on element removal, etc. from Patrick McHardy.

* Restore context in the rule deletion path, as we now release rule objects
  synchronously, from Patrick McHardy. This gets back event notification for
  anonymous sets.

* Fix NAT family validation in nft_nat, also from Patrick.

* Improve scalability of xt_connlimit by using an array of spinlocks and
  by introducing a rb-tree of hashtables for faster lookup of accounted
  objects per network. This patch was preceded by several patches and
  refactorizations to accomodate this change including the use of kmem_cache,
  from Florian Westphal.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e7ef085d 7d084877
Loading
Loading
Loading
Loading
+11 −4
Original line number Original line Diff line number Diff line
@@ -39,11 +39,13 @@ enum ip_set_feature {
	IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
	IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
	IPSET_TYPE_IFACE_FLAG = 5,
	IPSET_TYPE_IFACE_FLAG = 5,
	IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
	IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
	IPSET_TYPE_NOMATCH_FLAG = 6,
	IPSET_TYPE_MARK_FLAG = 6,
	IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
	IPSET_TYPE_NOMATCH_FLAG = 7,
	IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
	IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
	/* Strictly speaking not a feature, but a flag for dumping:
	/* Strictly speaking not a feature, but a flag for dumping:
	 * this settype must be dumped last */
	 * this settype must be dumped last */
	IPSET_DUMP_LAST_FLAG = 7,
	IPSET_DUMP_LAST_FLAG = 8,
	IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
	IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
};
};


@@ -63,6 +65,7 @@ enum ip_set_extension {
#define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
#define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
#define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)


/* Extension id, in size order */
/* Extension id, in size order */
enum ip_set_ext_id {
enum ip_set_ext_id {
@@ -171,8 +174,6 @@ struct ip_set_type {
	char name[IPSET_MAXNAMELEN];
	char name[IPSET_MAXNAMELEN];
	/* Protocol version */
	/* Protocol version */
	u8 protocol;
	u8 protocol;
	/* Set features to control swapping */
	u8 features;
	/* Set type dimension */
	/* Set type dimension */
	u8 dimension;
	u8 dimension;
	/*
	/*
@@ -182,6 +183,8 @@ struct ip_set_type {
	u8 family;
	u8 family;
	/* Type revisions */
	/* Type revisions */
	u8 revision_min, revision_max;
	u8 revision_min, revision_max;
	/* Set features to control swapping */
	u16 features;


	/* Create set */
	/* Create set */
	int (*create)(struct net *net, struct ip_set *set,
	int (*create)(struct net *net, struct ip_set *set,
@@ -217,6 +220,8 @@ struct ip_set {
	u8 revision;
	u8 revision;
	/* Extensions */
	/* Extensions */
	u8 extensions;
	u8 extensions;
	/* Create flags */
	u8 flags;
	/* Default timeout value, if enabled */
	/* Default timeout value, if enabled */
	u32 timeout;
	u32 timeout;
	/* Element data size */
	/* Element data size */
@@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
	if (SET_WITH_COMMENT(set))
	if (SET_WITH_COMMENT(set))
		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
	if (SET_WITH_FORCEADD(set))
		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;


	if (!cadt_flags)
	if (!cadt_flags)
		return 0;
		return 0;
+21 −0
Original line number Original line Diff line number Diff line
@@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,


void nfnl_lock(__u8 subsys_id);
void nfnl_lock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id);
#ifdef CONFIG_PROVE_LOCKING
int lockdep_nfnl_is_held(__u8 subsys_id);
#else
static inline int lockdep_nfnl_is_held(__u8 subsys_id)
{
	return 1;
}
#endif /* CONFIG_PROVE_LOCKING */

/*
 * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex
 *
 * @p: The pointer to read, prior to dereferencing
 * @ss: The nfnetlink subsystem ID
 *
 * Return the value of the specified RCU-protected pointer, but omit
 * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
 * caller holds the NFNL subsystem mutex.
 */
#define nfnl_dereference(p, ss)					\
	rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))


#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
	MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
+9 −2
Original line number Original line Diff line number Diff line
@@ -73,10 +73,17 @@ struct nf_conn_help {


struct nf_conn {
struct nf_conn {
	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
           plus 1 for any connection(s) we are `master' for */
	 * plus 1 for any connection(s) we are `master' for
	 *
	 * Hint, SKB address this struct and refcnt via skb->nfct and
	 * helpers nf_conntrack_get() and nf_conntrack_put().
	 * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
	 * beware nf_ct_get() is different and don't inc refcnt.
	 */
	struct nf_conntrack ct_general;
	struct nf_conntrack ct_general;


	spinlock_t	lock;
	spinlock_t	lock;
	u16		cpu;


	/* XXX should I move this to the tail ? - Y.K */
	/* XXX should I move this to the tail ? - Y.K */
	/* These are my tuples; original and reply */
	/* These are my tuples; original and reply */
+8 −1
Original line number Original line Diff line number Diff line
@@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
            const struct nf_conntrack_l3proto *l3proto,
            const struct nf_conntrack_l3proto *l3proto,
            const struct nf_conntrack_l4proto *proto);
            const struct nf_conntrack_l4proto *proto);


extern spinlock_t nf_conntrack_lock ;
#ifdef CONFIG_LOCKDEP
# define CONNTRACK_LOCKS 8
#else
# define CONNTRACK_LOCKS 1024
#endif
extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];

extern spinlock_t nf_conntrack_expect_lock;


#endif /* _NF_CONNTRACK_CORE_H */
#endif /* _NF_CONNTRACK_CORE_H */
+3 −1
Original line number Original line Diff line number Diff line
@@ -7,6 +7,8 @@


#include <uapi/linux/netfilter/xt_connlabel.h>
#include <uapi/linux/netfilter/xt_connlabel.h>


#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)

struct nf_conn_labels {
struct nf_conn_labels {
	u8 words;
	u8 words;
	unsigned long bits[];
	unsigned long bits[];
@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
	u8 words;
	u8 words;


	words = ACCESS_ONCE(net->ct.label_words);
	words = ACCESS_ONCE(net->ct.label_words);
	if (words == 0 || WARN_ON_ONCE(words > 8))
	if (words == 0)
		return NULL;
		return NULL;


	cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
	cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
Loading