Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4cf411de authored by Patrick McHardy's avatar Patrick McHardy Committed by David S. Miller
Browse files

[NETFILTER]: Get rid of HW checksum invalidation



Update hardware checksums incrementally to avoid breaking GSO.

Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 84fa7933
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -282,6 +282,12 @@ extern void nf_invalidate_cache(int pf);
   Returns true or false. */
extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len);

extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval,
				u_int32_t csum);
extern u_int16_t nf_proto_csum_update(struct sk_buff *skb,
				      u_int32_t oldval, u_int32_t newval,
				      u_int16_t csum, int pseudohdr);

struct nf_afinfo {
	unsigned short	family;
	unsigned int	(*checksum)(struct sk_buff *skb, unsigned int hook,
+0 −4
Original line number Diff line number Diff line
@@ -72,10 +72,6 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
			     const struct ip_conntrack *ignored_conntrack);

/* Calculate relative checksum. */
extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv,
				    u_int32_t newval,
				    u_int16_t oldcheck);
#else  /* !__KERNEL__: iptables wants this to compile. */
#define ip_nat_multi_range ip_nat_multi_range_compat
#endif /*__KERNEL__*/
+4 −4
Original line number Diff line number Diff line
@@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct,
			       unsigned int hooknum,
			       struct sk_buff **pskb);

extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
					 struct ip_conntrack *ct,
					 enum ip_nat_manip_type manip,
					 enum ip_conntrack_dir dir);
extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
					 enum ip_conntrack_info ctinfo,
					 unsigned int hooknum,
					 struct sk_buff **pskb);
#endif /* _IP_NAT_CORE_H */
+20 −32
Original line number Diff line number Diff line
@@ -101,18 +101,6 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
	write_unlock_bh(&ip_nat_lock);
}

/* We do checksum mangling, so if they were wrong before they're still
 * wrong.  Also works for incomplete packets (eg. ICMP dest
 * unreachables.) */
u_int16_t
ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
{
	u_int32_t diffs[] = { oldvalinv, newval };
	return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
				      oldcheck^0xFFFF));
}
EXPORT_SYMBOL(ip_nat_cheat_check);

/* Is this tuple already taken? (not by us) */
int
ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
@@ -378,11 +366,11 @@ manip_pkt(u_int16_t proto,
	iph = (void *)(*pskb)->data + iphdroff;

	if (maniptype == IP_NAT_MANIP_SRC) {
		iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
		iph->check = nf_csum_update(~iph->saddr, target->src.ip,
					    iph->check);
		iph->saddr = target->src.ip;
	} else {
		iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
		iph->check = nf_csum_update(~iph->daddr, target->dst.ip,
					    iph->check);
		iph->daddr = target->dst.ip;
	}
@@ -423,10 +411,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct,
EXPORT_SYMBOL_GPL(ip_nat_packet);

/* Dir is direction ICMP is coming from (opposite to packet it contains) */
int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
				  struct ip_conntrack *ct,
				  enum ip_nat_manip_type manip,
				  enum ip_conntrack_dir dir)
int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
				  enum ip_conntrack_info ctinfo,
				  unsigned int hooknum,
				  struct sk_buff **pskb)
{
	struct {
		struct icmphdr icmp;
@@ -434,7 +422,9 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
	} *inside;
	struct ip_conntrack_tuple inner, target;
	int hdrlen = (*pskb)->nh.iph->ihl * 4;
	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
	unsigned long statusbit;
	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);

	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
		return 0;
@@ -443,12 +433,8 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,

	/* We're actually going to mangle it beyond trivial checksum
	   adjustment, so make sure the current checksum is correct. */
	if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
		hdrlen = (*pskb)->nh.iph->ihl * 4;
		if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
						(*pskb)->len - hdrlen, 0)))
	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
		return 0;
	}

	/* Must be RELATED */
	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
@@ -487,12 +473,14 @@ int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
		       !manip))
		return 0;

	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
		/* Reloading "inside" here since manip_pkt inner. */
		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
		inside->icmp.checksum = 0;
		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
							       (*pskb)->len - hdrlen,
							       0));
	}

	/* Change outer to look the reply to an incoming packet
	 * (proto 0 means don't invert per-proto part). */
+41 −18
Original line number Diff line number Diff line
@@ -165,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
{
	struct iphdr *iph;
	struct tcphdr *tcph;
	int datalen;
	int oldlen, datalen;

	if (!skb_make_writable(pskb, (*pskb)->len))
		return 0;
@@ -180,13 +180,22 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
	iph = (*pskb)->nh.iph;
	tcph = (void *)iph + iph->ihl*4;

	oldlen = (*pskb)->len - iph->ihl*4;
	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
			match_offset, match_len, rep_buffer, rep_len);

	datalen = (*pskb)->len - iph->ihl*4;
	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
		tcph->check = 0;
	tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
				   csum_partial((char *)tcph, datalen, 0));
		tcph->check = tcp_v4_check(tcph, datalen,
					   iph->saddr, iph->daddr,
					   csum_partial((char *)tcph,
					   		datalen, 0));
	} else
		tcph->check = nf_proto_csum_update(*pskb,
						   htons(oldlen) ^ 0xFFFF,
						   htons(datalen),
						   tcph->check, 1);

	if (rep_len != match_len) {
		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
@@ -221,6 +230,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
{
	struct iphdr *iph;
	struct udphdr *udph;
	int datalen, oldlen;

	/* UDP helpers might accidentally mangle the wrong packet */
	iph = (*pskb)->nh.iph;
@@ -238,22 +248,32 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,

	iph = (*pskb)->nh.iph;
	udph = (void *)iph + iph->ihl*4;

	oldlen = (*pskb)->len - iph->ihl*4;
	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
			match_offset, match_len, rep_buffer, rep_len);

	/* update the length of the UDP packet */
	udph->len = htons((*pskb)->len - iph->ihl*4);
	datalen = (*pskb)->len - iph->ihl*4;
	udph->len = htons(datalen);

	/* fix udp checksum if udp checksum was previously calculated */
	if (udph->check) {
		int datalen = (*pskb)->len - iph->ihl * 4;
	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
		return 1;

	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
		udph->check = 0;
		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
		                                datalen, IPPROTO_UDP,
		                                csum_partial((char *)udph,
		                                             datalen, 0));
	}

		if (!udph->check)
			udph->check = -1;
	} else
		udph->check = nf_proto_csum_update(*pskb,
						   htons(oldlen) ^ 0xFFFF,
						   htons(datalen),
						   udph->check, 1);
	return 1;
}
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
@@ -293,11 +313,14 @@ sack_adjust(struct sk_buff *skb,
			ntohl(sack->start_seq), new_start_seq,
			ntohl(sack->end_seq), new_end_seq);

		tcph->check = 
			ip_nat_cheat_check(~sack->start_seq, new_start_seq,
					   ip_nat_cheat_check(~sack->end_seq, 
		tcph->check = nf_proto_csum_update(skb,
						   ~sack->start_seq,
						   new_start_seq,
						   tcph->check, 0);
		tcph->check = nf_proto_csum_update(skb,
						   ~sack->end_seq,
						   new_end_seq,
							      tcph->check));
						   tcph->check, 0);
		sack->start_seq = new_start_seq;
		sack->end_seq = new_end_seq;
		sackoff += sizeof(*sack);
@@ -381,10 +404,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
		newack = ntohl(tcph->ack_seq) - other_way->offset_before;
	newack = htonl(newack);

	tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
					 ip_nat_cheat_check(~tcph->ack_seq, 
					 		    newack, 
							    tcph->check));
	tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq,
					   tcph->check, 0);
	tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack,
					   tcph->check, 0);

	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
Loading