Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ada6c1de authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

This a bit large (and late) patchset that contains Netfilter updates for
net-next. Most relevantly br_netfilter fixes, ipset RCU support, removal of
x_tables percpu ruleset copy and rework of the nf_tables netdev support. More
specifically, they are:

1) Warn the user when there is a better protocol conntracker available, from
   Marcelo Ricardo Leitner.

2) Fix forwarding of IPv6 fragmented traffic in br_netfilter, from Bernhard
   Thaler. This comes with several patches to prepare the change in first place.

3) Get rid of special mtu handling of PPPoE/VLAN frames for br_netfilter. This
   is not needed anymore since now we use the largest fragment size to
   refragment, from Florian Westphal.

4) Restore vlan tag when refragmenting in br_netfilter, also from Florian.

5) Get rid of the percpu ruleset copy in x_tables, from Florian. Plus another
   follow up patch to refine it from Eric Dumazet.

6) Several ipset cleanups, fixes and finally RCU support, from Jozsef Kadlecsik.

7) Get rid of parens in Netfilter Kconfig files.

8) Attach the net_device to the basechain as opposed to the initial per table
   approach in the nf_tables netdev family.

9) Subscribe to netdev events to detect the removal and registration of a
   device that is referenced by a basechain.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 758f0d4b 835b8033
Loading
Loading
Loading
Loading
+17 −12
Original line number Diff line number Diff line
@@ -108,8 +108,13 @@ struct ip_set_counter {
	atomic64_t packets;
};

struct ip_set_comment_rcu {
	struct rcu_head rcu;
	char str[0];
};

struct ip_set_comment {
	char *str;
	struct ip_set_comment_rcu __rcu *c;
};

struct ip_set_skbinfo {
@@ -176,6 +181,9 @@ struct ip_set_type_variant {
	/* List elements */
	int (*list)(const struct ip_set *set, struct sk_buff *skb,
		    struct netlink_callback *cb);
	/* Keep listing private when resizing runs parallel */
	void (*uref)(struct ip_set *set, struct netlink_callback *cb,
		     bool start);

	/* Return true if "b" set is the same as "a"
	 * according to the create set parameters */
@@ -223,7 +231,7 @@ struct ip_set {
	/* The name of the set */
	char name[IPSET_MAXNAMELEN];
	/* Lock protecting the set data */
	rwlock_t lock;
	spinlock_t lock;
	/* References to the set */
	u32 ref;
	/* The core set type */
@@ -346,7 +354,6 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
	       (skbinfo->skbqueue &&
		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
			     cpu_to_be16(skbinfo->skbqueue)));

}

static inline void
@@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,

/* Netlink CB args */
enum {
	IPSET_CB_NET = 0,
	IPSET_CB_DUMP,
	IPSET_CB_INDEX,
	IPSET_CB_ARG0,
	IPSET_CB_NET = 0,	/* net namespace */
	IPSET_CB_DUMP,		/* dump single set/all sets */
	IPSET_CB_INDEX,		/* set index */
	IPSET_CB_PRIVATE,	/* set private data */
	IPSET_CB_ARG0,		/* type specific */
	IPSET_CB_ARG1,
	IPSET_CB_ARG2,
};

/* register and unregister set references */
@@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
	  .timeout = (set)->timeout }

#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))

#define IPSET_CONCAT(a, b)		a##b
#define IPSET_TOKEN(a, b)		IPSET_CONCAT(a, b)

+27 −11
Original line number Diff line number Diff line
@@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
	return nla_data(tb);
}

/* Called from uadd only, protected by the set spinlock.
 * The kadt functions don't use the comment extensions in any way.
 */
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
		    const struct ip_set_ext *ext)
{
	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
	size_t len = ext->comment ? strlen(ext->comment) : 0;

	if (unlikely(comment->str)) {
		kfree(comment->str);
		comment->str = NULL;
	if (unlikely(c)) {
		kfree_rcu(c, rcu);
		rcu_assign_pointer(comment->c, NULL);
	}
	if (!len)
		return;
	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
		len = IPSET_MAX_COMMENT_SIZE;
	comment->str = kzalloc(len + 1, GFP_ATOMIC);
	if (unlikely(!comment->str))
	c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
	if (unlikely(!c))
		return;
	strlcpy(comment->str, ext->comment, len + 1);
	strlcpy(c->str, ext->comment, len + 1);
	rcu_assign_pointer(comment->c, c);
}

/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
	if (!comment->str)
	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);

	if (!c)
		return 0;
	return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}

/* Called from uadd/udel, flush or the garbage collectors protected
 * by the set spinlock.
 * Called when the set is destroyed and when there can't be any user
 * of the set data anymore.
 */
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
	if (unlikely(!comment->str))
	struct ip_set_comment_rcu *c;

	c = rcu_dereference_protected(comment->c, 1);
	if (unlikely(!c))
		return;
	kfree(comment->str);
	comment->str = NULL;
	kfree_rcu(c, rcu);
	rcu_assign_pointer(comment->c, NULL);
}

#endif
+11 −16
Original line number Diff line number Diff line
@@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}

static inline bool
ip_set_timeout_test(unsigned long timeout)
ip_set_timeout_expired(unsigned long *t)
{
	return timeout == IPSET_ELEM_PERMANENT ||
	       time_is_after_jiffies(timeout);
}

static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
	return *timeout != IPSET_ELEM_PERMANENT &&
	       time_is_before_jiffies(*timeout);
	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}

static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t)
ip_set_timeout_set(unsigned long *timeout, u32 value)
{
	if (!t) {
	unsigned long t;

	if (!value) {
		*timeout = IPSET_ELEM_PERMANENT;
		return;
	}

	*timeout = msecs_to_jiffies(t * 1000) + jiffies;
	if (*timeout == IPSET_ELEM_PERMANENT)
	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
	if (t == IPSET_ELEM_PERMANENT)
		/* Bingo! :-) */
		(*timeout)--;
		t--;
	*timeout = t;
}

static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
	return *timeout == IPSET_ELEM_PERMANENT ? 0 :
		jiffies_to_msecs(*timeout - jiffies)/1000;
		jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}

#endif	/* __KERNEL__ */
+51 −5
Original line number Diff line number Diff line
@@ -224,13 +224,10 @@ struct xt_table_info {
	unsigned int stacksize;
	unsigned int __percpu *stackptr;
	void ***jumpstack;
	/* ipt_entry tables: one per CPU */
	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
	void *entries[1];

	unsigned char entries[0] __aligned(8);
};

#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
			  + nr_cpu_ids * sizeof(char *))
int xt_register_target(struct xt_target *target);
void xt_unregister_target(struct xt_target *target);
int xt_register_targets(struct xt_target *target, unsigned int n);
@@ -353,6 +350,55 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
	return ret;
}


/* On SMP, ip(6)t_entry->counters.pcnt holds address of the
 * real (percpu) counter.  On !SMP, its just the packet count,
 * so nothing needs to be done there.
 *
 * xt_percpu_counter_alloc returns the address of the percpu
 * counter, or 0 on !SMP.
 *
 * Hence caller must use IS_ERR_VALUE to check for error, this
 * allows us to return 0 for single core systems without forcing
 * callers to deal with SMP vs. NONSMP issues.
 */
static inline u64 xt_percpu_counter_alloc(void)
{
	if (nr_cpu_ids > 1) {
		void __percpu *res = alloc_percpu(struct xt_counters);

		if (res == NULL)
			return (u64) -ENOMEM;

		return (__force u64) res;
	}

	return 0;
}
static inline void xt_percpu_counter_free(u64 pcnt)
{
	if (nr_cpu_ids > 1)
		free_percpu((void __percpu *) pcnt);
}

static inline struct xt_counters *
xt_get_this_cpu_counter(struct xt_counters *cnt)
{
	if (nr_cpu_ids > 1)
		return this_cpu_ptr((void __percpu *) cnt->pcnt);

	return cnt;
}

static inline struct xt_counters *
xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
{
	if (nr_cpu_ids > 1)
		return per_cpu_ptr((void __percpu *) cnt->pcnt, cpu);

	return cnt;
}

struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);

+0 −7
Original line number Diff line number Diff line
@@ -20,13 +20,6 @@ enum nf_br_hook_priorities {
#define BRNF_BRIDGED_DNAT		0x02
#define BRNF_NF_BRIDGE_PREROUTING	0x08

static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
	if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
		return PPPOE_SES_HLEN;
	return 0;
}

int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);

static inline void br_drop_fake_rtable(struct sk_buff *skb)
Loading