Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 4cba259f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'unified-tunnel-dst-caching'



Paolo Abeni says:

====================
net: unify dst caching for tunnel devices

This patch series try to unify the dst cache implementations currently
present in the kernel, namely in ip_tunnel.c and ip6_tunnel.c, introducing a
new generic implementation, replacing the existing ones, and then using
the new implementation in other tunnel devices which currently lack it.

The new dst implementation is compiled, as built-in, only if any device using
it is enabled.

Caching the dst for the tunnel remote address gives small, but measurable,
performance improvement when tunneling over ipv4 (in the 2%-4% range) and
significant ones when tunneling over ipv6 (roughly 60% when no
fragmentation/segmentation take place and the tunnel local address
is not specified).

v2:
- move the vxlan dst_cache usage inside the device lookup functions
- fix usage after free for lwt tunnel moving the dst cache storage inside
  the dst_metadata,
- sparse codying style cleanup
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64f63d59 3c1cb4d2
Loading
Loading
Loading
Loading
+39 −0
Original line number Diff line number Diff line
@@ -72,6 +72,7 @@ struct geneve_dev {
	bool		   collect_md;
	struct gro_cells   gro_cells;
	u32		   flags;
	struct dst_cache   dst_cache;
};

/* Geneve device flags */
@@ -297,6 +298,13 @@ static int geneve_init(struct net_device *dev)
		return err;
	}

	err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
	if (err) {
		free_percpu(dev->tstats);
		gro_cells_destroy(&geneve->gro_cells);
		return err;
	}

	return 0;
}

@@ -304,6 +312,7 @@ static void geneve_uninit(struct net_device *dev)
{
	struct geneve_dev *geneve = netdev_priv(dev);

	dst_cache_destroy(&geneve->dst_cache);
	gro_cells_destroy(&geneve->gro_cells);
	free_percpu(dev->tstats);
}
@@ -753,7 +762,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
				       struct ip_tunnel_info *info)
{
	struct geneve_dev *geneve = netdev_priv(dev);
	struct dst_cache *dst_cache;
	struct rtable *rt = NULL;
	bool use_cache = true;
	__u8 tos;

	memset(fl4, 0, sizeof(*fl4));
@@ -764,16 +775,26 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
		fl4->daddr = info->key.u.ipv4.dst;
		fl4->saddr = info->key.u.ipv4.src;
		fl4->flowi4_tos = RT_TOS(info->key.tos);
		dst_cache = &info->dst_cache;
	} else {
		tos = geneve->tos;
		if (tos == 1) {
			const struct iphdr *iip = ip_hdr(skb);

			tos = ip_tunnel_get_dsfield(iip, skb);
			use_cache = false;
		}

		fl4->flowi4_tos = RT_TOS(tos);
		fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
		dst_cache = &geneve->dst_cache;
	}

	use_cache = use_cache && !skb->mark;
	if (use_cache) {
		rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
		if (rt)
			return rt;
	}

	rt = ip_route_output_key(geneve->net, fl4);
@@ -786,6 +807,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
		ip_rt_put(rt);
		return ERR_PTR(-ELOOP);
	}
	if (use_cache)
		dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
	return rt;
}

@@ -798,6 +821,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
	struct geneve_dev *geneve = netdev_priv(dev);
	struct geneve_sock *gs6 = geneve->sock6;
	struct dst_entry *dst = NULL;
	struct dst_cache *dst_cache;
	bool use_cache = true;
	__u8 prio;

	memset(fl6, 0, sizeof(*fl6));
@@ -808,16 +833,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
		fl6->daddr = info->key.u.ipv6.dst;
		fl6->saddr = info->key.u.ipv6.src;
		fl6->flowi6_tos = RT_TOS(info->key.tos);
		dst_cache = &info->dst_cache;
	} else {
		prio = geneve->tos;
		if (prio == 1) {
			const struct iphdr *iip = ip_hdr(skb);

			prio = ip_tunnel_get_dsfield(iip, skb);
			use_cache = false;
		}

		fl6->flowi6_tos = RT_TOS(prio);
		fl6->daddr = geneve->remote.sin6.sin6_addr;
		dst_cache = &geneve->dst_cache;
	}

	use_cache = use_cache && !skb->mark;
	if (use_cache) {
		dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
		if (dst)
			return dst;
	}

	if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
@@ -830,6 +865,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
		return ERR_PTR(-ELOOP);
	}

	if (use_cache)
		dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
	return dst;
}
#endif
@@ -1272,6 +1309,8 @@ static int geneve_configure(struct net *net, struct net_device *dev,
			return -EPERM;
	}

	dst_cache_reset(&geneve->dst_cache);

	err = register_netdevice(dev);
	if (err)
		return err;
+48 −8
Original line number Diff line number Diff line
@@ -480,6 +480,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
	rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
	if (!rd)
		return 0;

	dst_cache_reset(&rd->dst_cache);
	rd->remote_ip = *ip;
	rd->remote_port = port;
	rd->remote_vni = vni;
@@ -501,6 +503,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
	if (rd == NULL)
		return -ENOBUFS;

	if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
		kfree(rd);
		return -ENOBUFS;
	}

	rd->remote_ip = *ip;
	rd->remote_port = port;
	rd->remote_vni = vni;
@@ -749,8 +757,10 @@ static void vxlan_fdb_free(struct rcu_head *head)
	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
	struct vxlan_rdst *rd, *nd;

	list_for_each_entry_safe(rd, nd, &f->remotes, list)
	list_for_each_entry_safe(rd, nd, &f->remotes, list) {
		dst_cache_destroy(&rd->dst_cache);
		kfree(rd);
	}
	kfree(f);
}

@@ -1754,11 +1764,24 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,

static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
				      struct sk_buff *skb, int oif, u8 tos,
				      __be32 daddr, __be32 *saddr)
				      __be32 daddr, __be32 *saddr,
				      struct dst_cache *dst_cache,
				      struct ip_tunnel_info *info)
{
	struct rtable *rt = NULL;
	bool use_cache = false;
	struct flowi4 fl4;

	/* when the ip_tunnel_info is availble, the tos used for lookup is
	 * packet independent, so we can use the cache
	 */
	if (!skb->mark && (!tos || info)) {
		use_cache = true;
		rt = dst_cache_get_ip4(dst_cache, saddr);
		if (rt)
			return rt;
	}

	memset(&fl4, 0, sizeof(fl4));
	fl4.flowi4_oif = oif;
	fl4.flowi4_tos = RT_TOS(tos);
@@ -1768,8 +1791,11 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
	fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;

	rt = ip_route_output_key(vxlan->net, &fl4);
	if (!IS_ERR(rt))
	if (!IS_ERR(rt)) {
		*saddr = fl4.saddr;
		if (use_cache)
			dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
	}
	return rt;
}

@@ -1777,12 +1803,19 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
					  struct sk_buff *skb, int oif,
					  const struct in6_addr *daddr,
					  struct in6_addr *saddr)
					  struct in6_addr *saddr,
					  struct dst_cache *dst_cache)
{
	struct dst_entry *ndst;
	struct flowi6 fl6;
	int err;

	if (!skb->mark) {
		ndst = dst_cache_get_ip6(dst_cache, saddr);
		if (ndst)
			return ndst;
	}

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_oif = oif;
	fl6.daddr = *daddr;
@@ -1797,6 +1830,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
		return ERR_PTR(err);

	*saddr = fl6.saddr;
	if (!skb->mark)
		dst_cache_set_ip6(dst_cache, ndst, saddr);
	return ndst;
}
#endif
@@ -1849,6 +1884,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
			   struct vxlan_rdst *rdst, bool did_rsc)
{
	struct dst_cache *dst_cache;
	struct ip_tunnel_info *info;
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct sock *sk;
@@ -1873,6 +1909,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
		vni = rdst->remote_vni;
		dst = &rdst->remote_ip;
		dst_cache = &rdst->dst_cache;
	} else {
		if (!info) {
			WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -1887,6 +1924,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		else
			remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
		dst = &remote_ip;
		dst_cache = &info->dst_cache;
	}

	if (vxlan_addr_any(dst)) {
@@ -1938,7 +1976,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

		rt = vxlan_get_route(vxlan, skb,
				     rdst ? rdst->remote_ifindex : 0, tos,
				     dst->sin.sin_addr.s_addr, &saddr);
				     dst->sin.sin_addr.s_addr, &saddr,
				     dst_cache, info);
		if (IS_ERR(rt)) {
			netdev_dbg(dev, "no route to %pI4\n",
				   &dst->sin.sin_addr.s_addr);
@@ -1990,7 +2029,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

		ndst = vxlan6_get_route(vxlan, skb,
					rdst ? rdst->remote_ifindex : 0,
					&dst->sin6.sin6_addr, &saddr);
					&dst->sin6.sin6_addr, &saddr,
					dst_cache);
		if (IS_ERR(ndst)) {
			netdev_dbg(dev, "no route to %pI6\n",
				   &dst->sin6.sin6_addr);
@@ -2331,7 +2371,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
			return -EINVAL;
		rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
				     info->key.u.ipv4.dst,
				     &info->key.u.ipv4.src);
				     &info->key.u.ipv4.src, NULL, info);
		if (IS_ERR(rt))
			return PTR_ERR(rt);
		ip_rt_put(rt);
@@ -2343,7 +2383,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
			return -EINVAL;
		ndst = vxlan6_get_route(vxlan, skb, 0,
					&info->key.u.ipv6.dst,
					&info->key.u.ipv6.src);
					&info->key.u.ipv6.src, NULL);
		if (IS_ERR(ndst))
			return PTR_ERR(ndst);
		dst_release(ndst);
+97 −0
Original line number Diff line number Diff line
#ifndef _NET_DST_CACHE_H
#define _NET_DST_CACHE_H

#include <linux/jiffies.h>
#include <net/dst.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ip6_fib.h>
#endif

struct dst_cache {
	struct dst_cache_pcpu __percpu *cache;
	unsigned long reset_ts;
};

/**
 *	dst_cache_get - perform cache lookup
 *	@dst_cache: the cache
 *
 *	The caller should use dst_cache_get_ip4() if it need to retrieve the
 *	source address to be used when xmitting to the cached dst.
 *	local BH must be disabled.
 */
struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);

/**
 *	dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
 *	@dst_cache: the cache
 *	@saddr: return value for the retrieved source address
 *
 *	local BH must be disabled.
 */
struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);

/**
 *	dst_cache_set_ip4 - store the ipv4 dst into the cache
 *	@dst_cache: the cache
 *	@dst: the entry to be cached
 *	@saddr: the source address to be stored inside the cache
 *
 *	local BH must be disabled.
 */
void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
		       __be32 saddr);

#if IS_ENABLED(CONFIG_IPV6)

/**
 *	dst_cache_set_ip6 - store the ipv6 dst into the cache
 *	@dst_cache: the cache
 *	@dst: the entry to be cached
 *	@saddr: the source address to be stored inside the cache
 *
 *	local BH must be disabled.
 */
void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
		       const struct in6_addr *addr);

/**
 *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
 *	@dst_cache: the cache
 *	@saddr: return value for the retrieved source address
 *
 *	local BH must be disabled.
 */
struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
				    struct in6_addr *saddr);
#endif

/**
 *	dst_cache_reset - invalidate the cache contents
 *	@dst_cache: the cache
 *
 *	This do not free the cached dst to avoid races and contentions.
 *	the dst will be freed on later cache lookup.
 */
static inline void dst_cache_reset(struct dst_cache *dst_cache)
{
	dst_cache->reset_ts = jiffies;
}

/**
 *	dst_cache_init - initialize the cache, allocating the required storage
 *	@dst_cache: the cache
 *	@gfp: allocation flags
 */
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);

/**
 *	dst_cache_destroy - empty the cache and free the allocated storage
 *	@dst_cache: the cache
 *
 *	No synchronization is enforced: it must be called only when the cache
 *	is unsed.
 */
void dst_cache_destroy(struct dst_cache *dst_cache);

#endif
+1 −0
Original line number Diff line number Diff line
@@ -62,6 +62,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
		      sizeof(a->u.tun_info) + a->u.tun_info.options_len);
}

void metadata_dst_free(struct metadata_dst *);
struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags);
struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags);

+2 −12
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h>
#include <net/ip_tunnels.h>
#include <net/dst_cache.h>

#define IP6TUNNEL_ERR_TIMEO (30*HZ)

@@ -33,12 +34,6 @@ struct __ip6_tnl_parm {
	__be32			o_key;
};

struct ip6_tnl_dst {
	seqlock_t lock;
	struct dst_entry __rcu *dst;
	u32 cookie;
};

/* IPv6 tunnel */
struct ip6_tnl {
	struct ip6_tnl __rcu *next;	/* next tunnel in list */
@@ -46,7 +41,7 @@ struct ip6_tnl {
	struct net *net;	/* netns for packet i/o */
	struct __ip6_tnl_parm parms;	/* tunnel configuration parameters */
	struct flowi fl;	/* flowi template for xmit */
	struct ip6_tnl_dst __percpu *dst_cache;	/* cached dst */
	struct dst_cache dst_cache;	/* cached dst */

	int err_count;
	unsigned long err_time;
@@ -66,11 +61,6 @@ struct ipv6_tlv_tnl_enc_lim {
	__u8 encap_limit;	/* tunnel encapsulation limit   */
} __packed;

struct dst_entry *ip6_tnl_dst_get(struct ip6_tnl *t);
int ip6_tnl_dst_init(struct ip6_tnl *t);
void ip6_tnl_dst_destroy(struct ip6_tnl *t);
void ip6_tnl_dst_reset(struct ip6_tnl *t);
void ip6_tnl_dst_set(struct ip6_tnl *t, struct dst_entry *dst);
int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
		const struct in6_addr *raddr);
int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
Loading