Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 85224844 authored by David S. Miller's avatar David S. Miller
Browse files


Pablo Neira Ayuso says:

====================
pull request: netfilter/ipvs updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:

1) Four patches to make the new nf_tables masquerading support
   independent of the x_tables infrastructure. This also resolves a
   compilation breakage if the masquerade target is disabled but the
   nf_tables masq expression is enabled.

2) ipset updates via Jozsef Kadlecsik. This includes the addition of the
   skbinfo extension that allows you to store packet metainformation in the
   elements. This can be used to fetch and restore this to the packets through
   the iptables SET target, patches from Anton Danilov.

3) Add the hash:mac set type to ipset, from Jozsef Kadlecsick.

4) Add simple weighted fail-over scheduler via Simon Horman. This provides
   a fail-over IPVS scheduler (unlike existing load balancing schedulers).
   Connections are directed to the appropriate server based solely on
   highest weight value and server availability, patch from Kenny Mathis.

5) Support IPv6 real servers in IPv4 virtual-services and vice versa.
   Simon Horman informs that the motivation for this is to allow more
   flexibility in the choice of IP version offered by both virtual-servers
   and real-servers as they no longer need to match: An IPv4 connection
   from an end-user may be forwarded to a real-server using IPv6 and
   vice versa. No ip_vs_sync support yet though. Patches from Alex Gartrell
   and Julian Anastasov.

6) Add global generation ID to the nf_tables ruleset. When dumping from
   several different object lists, we need a way to identify that an update
   has ocurred so userspace knows that it needs to refresh its lists. This
   also includes a new command to obtain the 32-bits generation ID. The
   less significant 16-bits of this ID is also exposed through res_id field
   in the nfnetlink header to quickly detect the interference and retry when
   there is no risk of ID wraparound.

7) Move br_netfilter out of the bridge core. The br_netfilter code is
   built in the bridge core by default. This causes problems of different
   kind to people that don't want this: Jesper reported performance drop due
   to the inconditional hook registration and I remember to have read complains
   on netdev from people regarding the unexpected behaviour of our bridging
   stack when br_netfilter is enabled (fragmentation handling, layer 3 and
   upper inspection). People that still need this should easily undo the
   damage by modprobing the new br_netfilter module.

8) Dump the set policy nf_tables that allows set parameterization. So
   userspace can keep user-defined preferences when saving the ruleset.
   From Arturo Borrero.

9) Use __seq_open_private() helper function to reduce boiler plate code
   in x_tables, From Rob Jones.

10) Safer default behaviour in case that you forget to load the protocol
   tracker. Daniel Borkmann and Florian Westphal detected that if your
   ruleset is stateful, you allow traffic to at least one single SCTP port
   and the SCTP protocol tracker is not loaded, then any SCTP traffic may
   be pass through unfiltered. After this patch, the connection tracking
   classifies SCTP/DCCP/UDPlite/GRE packets as invalid if your kernel has
   been compiled with support for these modules.
====================

Trivially resolved conflict in include/linux/skbuff.h, Eric moved some
netfilter skbuff members around, and the netfilter tree adjusted the
ifdef guards for the bridging info pointer.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 735d3831 db29a950
Loading
Loading
Loading
Loading
+59 −1
Original line number Diff line number Diff line
@@ -57,6 +57,8 @@ enum ip_set_extension {
	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
	IPSET_EXT_BIT_COMMENT = 2,
	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
	IPSET_EXT_BIT_SKBINFO = 3,
	IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
	/* Mark set with an extension which needs to call destroy */
	IPSET_EXT_BIT_DESTROY = 7,
	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
#define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
#define SET_WITH_SKBINFO(s)	((s)->extensions & IPSET_EXT_SKBINFO)
#define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)

/* Extension id, in size order */
enum ip_set_ext_id {
	IPSET_EXT_ID_COUNTER = 0,
	IPSET_EXT_ID_TIMEOUT,
	IPSET_EXT_ID_SKBINFO,
	IPSET_EXT_ID_COMMENT,
	IPSET_EXT_ID_MAX,
};
@@ -92,6 +96,10 @@ struct ip_set_ext {
	u64 packets;
	u64 bytes;
	u32 timeout;
	u32 skbmark;
	u32 skbmarkmask;
	u32 skbprio;
	u16 skbqueue;
	char *comment;
};

@@ -104,6 +112,13 @@ struct ip_set_comment {
	char *str;
};

struct ip_set_skbinfo {
	u32 skbmark;
	u32 skbmarkmask;
	u32 skbprio;
	u16 skbqueue;
};

struct ip_set;

#define ext_timeout(e, s)	\
@@ -112,7 +127,8 @@ struct ip_set;
(struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
#define ext_comment(e, s)	\
(struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])

#define ext_skbinfo(e, s)	\
(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])

typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
			   const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
	if (SET_WITH_COMMENT(set))
		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
	if (SET_WITH_SKBINFO(set))
		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
	if (SET_WITH_FORCEADD(set))
		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;

@@ -304,6 +322,43 @@ ip_set_update_counter(struct ip_set_counter *counter,
	}
}

static inline void
ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
		      const struct ip_set_ext *ext,
		      struct ip_set_ext *mext, u32 flags)
{
		mext->skbmark = skbinfo->skbmark;
		mext->skbmarkmask = skbinfo->skbmarkmask;
		mext->skbprio = skbinfo->skbprio;
		mext->skbqueue = skbinfo->skbqueue;
}
static inline bool
ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
{
	/* Send nonzero parameters only */
	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
					  skbinfo->skbmarkmask))) ||
	       (skbinfo->skbprio &&
	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
			      cpu_to_be32(skbinfo->skbprio))) ||
	       (skbinfo->skbqueue &&
	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
			     cpu_to_be16(skbinfo->skbqueue)));

}

static inline void
ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
		    const struct ip_set_ext *ext)
{
	skbinfo->skbmark = ext->skbmark;
	skbinfo->skbmarkmask = ext->skbmarkmask;
	skbinfo->skbprio = ext->skbprio;
	skbinfo->skbqueue = ext->skbqueue;
}

static inline bool
ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
{
@@ -497,6 +552,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
	if (SET_WITH_COMMENT(set) &&
	    ip_set_put_comment(skb, ext_comment(e, set)))
		return -EMSGSIZE;
	if (SET_WITH_SKBINFO(set) &&
	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
		return -EMSGSIZE;
	return 0;
}

+39 −11
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
	NF_BR_PRI_LAST = INT_MAX,
};

#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)

#define BRNF_PKT_TYPE			0x01
#define BRNF_BRIDGED_DNAT		0x02
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
#define BRNF_8021Q			0x10
#define BRNF_PPPoE			0x20

/* Only used in br_forward.c */
int nf_bridge_copy_header(struct sk_buff *skb);
static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
{
	if (skb->nf_bridge &&
	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
		return nf_bridge_copy_header(skb);
  	return 0;
}

static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
{
	switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
	}
}

static inline void nf_bridge_update_protocol(struct sk_buff *skb)
{
	if (skb->nf_bridge->mask & BRNF_8021Q)
		skb->protocol = htons(ETH_P_8021Q);
	else if (skb->nf_bridge->mask & BRNF_PPPoE)
		skb->protocol = htons(ETH_P_PPP_SES);
}

/* Fill in the header for fragmented IP packets handled by
 * the IPv4 connection tracking code.
 *
 * Only used in br_forward.c
 */
static inline int nf_bridge_copy_header(struct sk_buff *skb)
{
	int err;
	unsigned int header_size;

	nf_bridge_update_protocol(skb);
	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
	err = skb_cow_head(skb, header_size);
	if (err)
		return err;

	skb_copy_to_linear_data_offset(skb, -header_size,
				       skb->nf_bridge->data, header_size);
	__skb_push(skb, nf_bridge_encap_header_len(skb));
	return 0;
}

static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
{
	if (skb->nf_bridge &&
	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
		return nf_bridge_copy_header(skb);
  	return 0;
}

static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
+6 −8
Original line number Diff line number Diff line
@@ -156,7 +156,7 @@ struct nf_conntrack {
};
#endif

#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
	atomic_t		use;
	unsigned int		mask;
@@ -534,7 +534,7 @@ struct sk_buff {
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	struct nf_conntrack	*nfct;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	struct nf_bridge_info	*nf_bridge;
#endif
	unsigned int		len,
@@ -556,8 +556,6 @@ struct sk_buff {
	/* one bit hole */
	kmemcheck_bitfield_end(flags1);



	/* fields enclosed in headers_start/headers_end are copied
	 * using a single memcpy() in __copy_skb_header()
	 */
@@ -3016,7 +3014,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
		atomic_inc(&nfct->use);
}
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -3034,7 +3032,7 @@ static inline void nf_reset(struct sk_buff *skb)
	nf_conntrack_put(skb->nfct);
	skb->nfct = NULL;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	nf_bridge_put(skb->nf_bridge);
	skb->nf_bridge = NULL;
#endif
@@ -3057,7 +3055,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
	if (copy)
		dst->nfctinfo = src->nfctinfo;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	dst->nf_bridge  = src->nf_bridge;
	nf_bridge_get(src->nf_bridge);
#endif
@@ -3072,7 +3070,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	nf_conntrack_put(dst->nfct);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
	nf_bridge_put(dst->nf_bridge);
#endif
	__nf_copy(dst, src, true);
+12 −3
Original line number Diff line number Diff line
@@ -535,6 +535,7 @@ struct ip_vs_conn {
	union nf_inet_addr      daddr;          /* destination address */
	volatile __u32          flags;          /* status flags */
	__u16                   protocol;       /* Which protocol (TCP/UDP) */
	__u16			daf;		/* Address family of the dest */
#ifdef CONFIG_NET_NS
	struct net              *net;           /* Name space */
#endif
@@ -648,6 +649,9 @@ struct ip_vs_dest_user_kern {
	/* thresholds for active connections */
	u32			u_threshold;	/* upper threshold */
	u32			l_threshold;	/* lower threshold */

	/* Address family of addr */
	u16			af;
};


@@ -986,6 +990,10 @@ struct netns_ipvs {
	char			backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
	/* net name space ptr */
	struct net		*net;            /* Needed by timer routines */
	/* Number of heterogeneous destinations, needed because
	 * heterogeneous are not supported when synchronization is
	 * enabled */
	unsigned int		mixed_address_family_dests;
};

#define DEFAULT_SYNC_THRESHOLD	3
@@ -1210,7 +1218,7 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);

struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
				  const union nf_inet_addr *daddr,
				  __be16 dport, unsigned int flags,
				  struct ip_vs_dest *dest, __u32 fwmark);
@@ -1396,8 +1404,9 @@ void ip_vs_unregister_nl_ioctl(void);
int ip_vs_control_init(void);
void ip_vs_control_cleanup(void);
struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
		__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
		const union nf_inet_addr *daddr, __be16 dport,
		const union nf_inet_addr *vaddr, __be16 vport,
		__u16 protocol, __u32 fwmark, __u32 flags);
void ip_vs_try_bind_dest(struct ip_vs_conn *cp);

+1 −1
Original line number Diff line number Diff line
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
	return 0;
}

#ifdef CONFIG_BRIDGE_NETFILTER
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
{
	unsigned int seq, hh_alen;
Loading