Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 73f156a6 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

inetpeer: get rid of ip_id_count



Ideally, we would need to generate IP ID using a per destination IP
generator.

linux kernels used inet_peer cache for this purpose, but this had a huge
cost on servers disabling MTU discovery.

1) each inet_peer struct consumes 192 bytes

2) inetpeer cache uses a binary tree of inet_peer structs,
   with a nominal size of ~66000 elements under load.

3) lookups in this tree are hitting a lot of cache lines, as tree depth
   is about 20.

4) If server deals with many tcp flows, we have a high probability of
   not finding the inet_peer, allocating a fresh one, inserting it in
   the tree with same initial ip_id_count, (cf secure_ip_id())

5) We garbage collect inet_peer aggressively.

IP ID generation do not have to be 'perfect'

Goal is trying to avoid duplicates in a short period of time,
so that reassembly units have a chance to complete reassembly of
fragments belonging to one message before receiving other fragments
with a recycled ID.

We simply use an array of generators, and a Jenkin hash using the dst IP
as a key.

ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it
belongs (it is only used from this file)

secure_ip_id() and secure_ipv6_id() no longer are needed.

Rename ip_select_ident_more() to ip_select_ident_segs() to avoid
unnecessary decrement/increment of the number of segments.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e067ee33
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -281,7 +281,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
	nf_reset(skb);

	skb->ip_summed = CHECKSUM_NONE;
	ip_select_ident(skb, &rt->dst, NULL);
	ip_select_ident(skb, NULL);
	ip_send_check(iph);

	ip_local_out(skb);
+3 −20
Original line number Diff line number Diff line
@@ -41,14 +41,13 @@ struct inet_peer {
		struct rcu_head     gc_rcu;
	};
	/*
	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
	 * are not available: rid, ip_id_count
	 * Once inet_peer is queued for deletion (refcnt == -1), following field
	 * is not available: rid
	 * We can share memory with rcu_head to help keep inet_peer small.
	 */
	union {
		struct {
			atomic_t			rid;		/* Frag reception counter */
			atomic_t			ip_id_count;	/* IP ID for the next packet */
		};
		struct rcu_head         rcu;
		struct inet_peer	*gc_next;
@@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
void inetpeer_invalidate_tree(struct inet_peer_base *);

/*
 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
 * temporary check to make sure we dont access rid, tcp_ts,
 * tcp_ts_stamp if no refcount is taken on inet_peer
 */
static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
}


/* can be called with or without local BH being disabled */
static inline int inet_getid(struct inet_peer *p, int more)
{
	int old, new;
	more++;
	inet_peer_refcheck(p);
	do {
		old = atomic_read(&p->ip_id_count);
		new = old + more;
		if (!new)
			new = 1;
	} while (atomic_cmpxchg(&p->ip_id_count, old, new) != old);
	return new;
}

#endif /* _NET_INETPEER_H */
+23 −17
Original line number Diff line number Diff line
@@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb)
	}
}

void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
#define IP_IDENTS_SZ 2048u
extern atomic_t *ip_idents;

static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
static inline u32 ip_idents_reserve(u32 hash, int segs)
{
	atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;

	return atomic_add_return(segs, id_ptr) - segs;
}

void __ip_select_ident(struct iphdr *iph, int segs);

static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
{
	struct iphdr *iph = ip_hdr(skb);

@@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
		 * does not change, they drop every other packet in
		 * a TCP stream using header compression.
		 */
		iph->id = (sk && inet_sk(sk)->inet_daddr) ?
					htons(inet_sk(sk)->inet_id++) : 0;
	} else
		__ip_select_ident(iph, dst, 0);
}

static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
{
	struct iphdr *iph = ip_hdr(skb);

	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
		if (sk && inet_sk(sk)->inet_daddr) {
			iph->id = htons(inet_sk(sk)->inet_id);
			inet_sk(sk)->inet_id += 1 + more;
		} else
			inet_sk(sk)->inet_id += segs;
		} else {
			iph->id = 0;
	} else
		__ip_select_ident(iph, dst, more);
		}
	} else {
		__ip_select_ident(iph, segs);
	}
}

static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
{
	ip_select_ident_segs(skb, sk, 1);
}

static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
+0 −2
Original line number Diff line number Diff line
@@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
}

void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);

int ip6_dst_hoplimit(struct dst_entry *dst);

static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
+0 −2
Original line number Diff line number Diff line
@@ -3,8 +3,6 @@

#include <linux/types.h>

__u32 secure_ip_id(__be32 daddr);
__u32 secure_ipv6_id(const __be32 daddr[4]);
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
			       __be16 dport);
Loading