Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 6aafeef0 authored by Jiri Pirko's avatar Jiri Pirko Committed by David S. Miller
Browse files

netfilter: push reasm skb through instead of original frag skbs



Pushing original fragments through causes several problems. For example
for matching, frags may not be matched correctly. Take following
example:

<example>
On HOSTA do:
ip6tables -I INPUT -p icmpv6 -j DROP
ip6tables -I INPUT -p icmpv6 -m icmp6 --icmpv6-type 128 -j ACCEPT

and on HOSTB you do:
ping6 HOSTA -s2000    (MTU is 1500)

Incoming echo requests will be filtered out on HOSTA. This issue does
not occur with smaller packets than MTU (where fragmentation does not happen)
</example>

As was discussed previously, the only correct solution seems to be to use
reassembled skb instead of separete frags. Doing this has positive side
effects in reducing sk_buff by one pointer (nfct_reasm) and also the reams
dances in ipvs and conntrack can be removed.

Future plan is to remove net/ipv6/netfilter/nf_conntrack_reasm.c
entirely and use code in net/ipv6/reassembly.c instead.

Signed-off-by: default avatarJiri Pirko <jiri@resnulli.us>
Acked-by: default avatarJulian Anastasov <ja@ssi.bg>
Signed-off-by: default avatarMarcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9037c357
Loading
Loading
Loading
Loading
+0 −32
Original line number Original line Diff line number Diff line
@@ -337,11 +337,6 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
#endif


#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
    defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
#endif

/** 
/** 
 *	struct sk_buff - socket buffer
 *	struct sk_buff - socket buffer
 *	@next: Next buffer in list
 *	@next: Next buffer in list
@@ -374,7 +369,6 @@ typedef unsigned char *sk_buff_data_t;
 *	@protocol: Packet protocol from driver
 *	@protocol: Packet protocol from driver
 *	@destructor: Destruct function
 *	@destructor: Destruct function
 *	@nfct: Associated connection, if any
 *	@nfct: Associated connection, if any
 *	@nfct_reasm: netfilter conntrack re-assembly pointer
 *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
 *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
 *	@skb_iif: ifindex of device we arrived on
 *	@skb_iif: ifindex of device we arrived on
 *	@tc_index: Traffic control index
 *	@tc_index: Traffic control index
@@ -463,9 +457,6 @@ struct sk_buff {
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	struct nf_conntrack	*nfct;
	struct nf_conntrack	*nfct;
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	struct sk_buff		*nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
	struct nf_bridge_info	*nf_bridge;
	struct nf_bridge_info	*nf_bridge;
#endif
#endif
@@ -2595,18 +2586,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
		atomic_inc(&nfct->use);
		atomic_inc(&nfct->use);
}
}
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
{
	if (skb)
		atomic_inc(&skb->users);
}
static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
{
	if (skb)
		kfree_skb(skb);
}
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
{
{
@@ -2625,10 +2604,6 @@ static inline void nf_reset(struct sk_buff *skb)
	nf_conntrack_put(skb->nfct);
	nf_conntrack_put(skb->nfct);
	skb->nfct = NULL;
	skb->nfct = NULL;
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	nf_conntrack_put_reasm(skb->nfct_reasm);
	skb->nfct_reasm = NULL;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
	nf_bridge_put(skb->nf_bridge);
	nf_bridge_put(skb->nf_bridge);
	skb->nf_bridge = NULL;
	skb->nf_bridge = NULL;
@@ -2650,10 +2625,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
	nf_conntrack_get(src->nfct);
	nf_conntrack_get(src->nfct);
	dst->nfctinfo = src->nfctinfo;
	dst->nfctinfo = src->nfctinfo;
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	dst->nfct_reasm = src->nfct_reasm;
	nf_conntrack_get_reasm(src->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
	dst->nf_bridge  = src->nf_bridge;
	dst->nf_bridge  = src->nf_bridge;
	nf_bridge_get(src->nf_bridge);
	nf_bridge_get(src->nf_bridge);
@@ -2665,9 +2636,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	nf_conntrack_put(dst->nfct);
	nf_conntrack_put(dst->nfct);
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	nf_conntrack_put_reasm(dst->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
	nf_bridge_put(dst->nf_bridge);
	nf_bridge_put(dst->nf_bridge);
#endif
#endif
+1 −31
Original line number Original line Diff line number Diff line
@@ -109,7 +109,6 @@ extern int ip_vs_conn_tab_size;
struct ip_vs_iphdr {
struct ip_vs_iphdr {
	__u32 len;	/* IPv4 simply where L4 starts
	__u32 len;	/* IPv4 simply where L4 starts
			   IPv6 where L4 Transport Header starts */
			   IPv6 where L4 Transport Header starts */
	__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
	__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
	__s16 protocol;
	__s16 protocol;
	__s32 flags;
	__s32 flags;
@@ -117,34 +116,12 @@ struct ip_vs_iphdr {
	union nf_inet_addr daddr;
	union nf_inet_addr daddr;
};
};


/* Dependency to module: nf_defrag_ipv6 */
#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
	return skb->nfct_reasm;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
				      int len, void *buffer,
				      const struct ip_vs_iphdr *ipvsh)
{
	if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
		return skb_header_pointer(skb_nfct_reasm(skb),
					  ipvsh->thoff_reasm, len, buffer);

	return skb_header_pointer(skb, offset, len, buffer);
}
#else
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
	return NULL;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
				      int len, void *buffer,
				      int len, void *buffer,
				      const struct ip_vs_iphdr *ipvsh)
				      const struct ip_vs_iphdr *ipvsh)
{
{
	return skb_header_pointer(skb, offset, len, buffer);
	return skb_header_pointer(skb, offset, len, buffer);
}
}
#endif


static inline void
static inline void
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
@@ -171,19 +148,12 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
			(struct ipv6hdr *)skb_network_header(skb);
			(struct ipv6hdr *)skb_network_header(skb);
		iphdr->saddr.in6 = iph->saddr;
		iphdr->saddr.in6 = iph->saddr;
		iphdr->daddr.in6 = iph->daddr;
		iphdr->daddr.in6 = iph->daddr;
		/* ipv6_find_hdr() updates len, flags, thoff_reasm */
		/* ipv6_find_hdr() updates len, flags */
		iphdr->thoff_reasm = 0;
		iphdr->len	 = 0;
		iphdr->len	 = 0;
		iphdr->flags	 = 0;
		iphdr->flags	 = 0;
		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
		iphdr->protocol  = ipv6_find_hdr(skb, &iphdr->len, -1,
						 &iphdr->fragoffs,
						 &iphdr->fragoffs,
						 &iphdr->flags);
						 &iphdr->flags);
		/* get proto from re-assembled packet and it's offset */
		if (skb_nfct_reasm(skb))
			iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
							&iphdr->thoff_reasm,
							-1, NULL, NULL);

	} else
	} else
#endif
#endif
	{
	{
+1 −3
Original line number Original line Diff line number Diff line
@@ -6,9 +6,7 @@ void nf_defrag_ipv6_enable(void);
int nf_ct_frag6_init(void);
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
void nf_ct_frag6_cleanup(void);
struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
void nf_ct_frag6_consume_orig(struct sk_buff *skb);
			struct net_device *in, struct net_device *out,
			int (*okfn)(struct sk_buff *));


struct inet_frags_ctl;
struct inet_frags_ctl;


+0 −3
Original line number Original line Diff line number Diff line
@@ -592,9 +592,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	nf_conntrack_put(skb->nfct);
	nf_conntrack_put(skb->nfct);
#endif
#endif
#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
	nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
#ifdef CONFIG_BRIDGE_NETFILTER
	nf_bridge_put(skb->nf_bridge);
	nf_bridge_put(skb->nf_bridge);
#endif
#endif
+2 −54
Original line number Original line Diff line number Diff line
@@ -169,64 +169,13 @@ static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
	return nf_conntrack_confirm(skb);
	return nf_conntrack_confirm(skb);
}
}


static unsigned int __ipv6_conntrack_in(struct net *net,
					unsigned int hooknum,
					struct sk_buff *skb,
					const struct net_device *in,
					const struct net_device *out,
					int (*okfn)(struct sk_buff *))
{
	struct sk_buff *reasm = skb->nfct_reasm;
	const struct nf_conn_help *help;
	struct nf_conn *ct;
	enum ip_conntrack_info ctinfo;

	/* This packet is fragmented and has reassembled packet. */
	if (reasm) {
		/* Reassembled packet isn't parsed yet ? */
		if (!reasm->nfct) {
			unsigned int ret;

			ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
			if (ret != NF_ACCEPT)
				return ret;
		}

		/* Conntrack helpers need the entire reassembled packet in the
		 * POST_ROUTING hook. In case of unconfirmed connections NAT
		 * might reassign a helper, so the entire packet is also
		 * required.
		 */
		ct = nf_ct_get(reasm, &ctinfo);
		if (ct != NULL && !nf_ct_is_untracked(ct)) {
			help = nfct_help(ct);
			if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
				nf_conntrack_get_reasm(reasm);
				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
					       (struct net_device *)in,
					       (struct net_device *)out,
					       okfn, NF_IP6_PRI_CONNTRACK + 1);
				return NF_DROP_ERR(-ECANCELED);
			}
		}

		nf_conntrack_get(reasm->nfct);
		skb->nfct = reasm->nfct;
		skb->nfctinfo = reasm->nfctinfo;
		return NF_ACCEPT;
	}

	return nf_conntrack_in(net, PF_INET6, hooknum, skb);
}

static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
				      struct sk_buff *skb,
				      struct sk_buff *skb,
				      const struct net_device *in,
				      const struct net_device *in,
				      const struct net_device *out,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
				      int (*okfn)(struct sk_buff *))
{
{
	return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);
				   okfn);
}
}


static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
@@ -240,8 +189,7 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
		return NF_ACCEPT;
		return NF_ACCEPT;
	}
	}
	return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);
				   okfn);
}
}


static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
Loading