Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d5a915b9 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'gre-add-collect_md-mode-for-ERSPAN-tunnel'



William Tu says:

====================
gre: add collect_md mode for ERSPAN tunnel

This patch series provide collect_md mode for ERSPAN tunnel.  The fist patch
refactors the existing gre_fb_xmit function by exacting the route cache
portion into a new function called prepare_fb_xmit.  The second patch
introduces the collect_md mode for ERSPAN tunnel, by calling the
prepare_fb_xmit function and adding ERSPAN specific logic.  The final patch
adds the test case using bpf_skb_{set,get}_tunnel_{key,opt}.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 03157937 ef88f89c
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -154,8 +154,10 @@ struct ip_tunnel {
#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE		__cpu_to_be16(0x2000)
#define TUNNEL_ERSPAN_OPT	__cpu_to_be16(0x4000)

#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
#define TUNNEL_OPTIONS_PRESENT \
		(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)

struct tnl_ptk_info {
	__be16 flags;
+138 −19
Original line number Diff line number Diff line
@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");

static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
				__be32 id, u32 index, bool truncate);

static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
					   false, false) < 0)
			goto drop;

		if (tunnel->collect_md) {
			struct ip_tunnel_info *info;
			struct erspan_metadata *md;
			__be64 tun_id;
			__be16 flags;

			tpi->flags |= TUNNEL_KEY;
			flags = tpi->flags;
			tun_id = key32_to_tunnel_id(tpi->key);

			tun_dst = ip_tun_rx_dst(skb, flags,
						tun_id, sizeof(*md));
			if (!tun_dst)
				return PACKET_REJECT;

			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
			if (!md)
				return PACKET_REJECT;

			md->index = index;
			info = &tun_dst->u.tun_info;
			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
			info->options_len = sizeof(*md);
		} else {
			tunnel->index = ntohl(index);
		}

		skb_reset_mac_header(skb);
		ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
		return PACKET_RCVD;
@@ -432,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
	return ip_route_output_key(net, fl);
}

static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
			__be16 proto)
static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
				      struct net_device *dev,
				      struct flowi4 *fl,
				      int tunnel_hlen)
{
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct rtable *rt = NULL;
	struct flowi4 fl;
	int min_headroom;
	int tunnel_hlen;
	__be16 df, flags;
	bool use_cache;
	int err;

	tun_info = skb_tunnel_info(skb);
	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
		     ip_tunnel_info_af(tun_info) != AF_INET))
		goto err_free_skb;

	key = &tun_info->key;
	use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);

	if (use_cache)
		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
		rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
	if (!rt) {
		rt = gre_get_rt(skb, dev, &fl, key);
		rt = gre_get_rt(skb, dev, fl, key);
		if (IS_ERR(rt))
			goto err_free_skb;
		if (use_cache)
			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
					  fl.saddr);
					  fl->saddr);
	}

	tunnel_hlen = gre_calc_hlen(key->tun_flags);

	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
			+ tunnel_hlen + sizeof(struct iphdr);
	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -476,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
		if (unlikely(err))
			goto err_free_rt;
	}
	return rt;

err_free_rt:
	ip_rt_put(rt);
err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	return NULL;
}

static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
			__be16 proto)
{
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct rtable *rt = NULL;
	struct flowi4 fl;
	int tunnel_hlen;
	__be16 df, flags;

	tun_info = skb_tunnel_info(skb);
	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
		     ip_tunnel_info_af(tun_info) != AF_INET))
		goto err_free_skb;

	key = &tun_info->key;
	tunnel_hlen = gre_calc_hlen(key->tun_flags);

	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
	if (!rt)
		return;

	/* Push Tunnel header. */
	if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -498,6 +551,64 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
	dev->stats.tx_dropped++;
}

static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
			   __be16 proto)
{
	struct ip_tunnel *tunnel = netdev_priv(dev);
	struct ip_tunnel_info *tun_info;
	const struct ip_tunnel_key *key;
	struct erspan_metadata *md;
	struct rtable *rt = NULL;
	bool truncate = false;
	struct flowi4 fl;
	int tunnel_hlen;
	__be16 df;

	tun_info = skb_tunnel_info(skb);
	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
		     ip_tunnel_info_af(tun_info) != AF_INET))
		goto err_free_skb;

	key = &tun_info->key;

	/* ERSPAN has fixed 8 byte GRE header */
	tunnel_hlen = 8 + sizeof(struct erspanhdr);

	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
	if (!rt)
		return;

	if (gre_handle_offloads(skb, false))
		goto err_free_rt;

	if (skb->len > dev->mtu) {
		pskb_trim(skb, dev->mtu);
		truncate = true;
	}

	md = ip_tunnel_info_opts(tun_info);
	if (!md)
		goto err_free_rt;

	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
			    ntohl(md->index), truncate);

	gre_build_header(skb, 8, TUNNEL_SEQ,
			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));

	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;

	iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
		      key->tos, key->ttl, df, false);
	return;

err_free_rt:
	ip_rt_put(rt);
err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
}

static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
	struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -611,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
	struct ip_tunnel *tunnel = netdev_priv(dev);
	bool truncate = false;

	if (tunnel->collect_md) {
		erspan_fb_xmit(skb, dev, skb->protocol);
		return NETDEV_TX_OK;
	}

	if (gre_handle_offloads(skb, false))
		goto free_skb;

@@ -973,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
		return ret;

	/* ERSPAN should only have GRE sequence and key flag */
	if (data[IFLA_GRE_OFLAGS])
		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
	if (data[IFLA_GRE_IFLAGS])
		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
	if (flags != (GRE_SEQ | GRE_KEY))
	if (!data[IFLA_GRE_COLLECT_METADATA] &&
	    flags != (GRE_SEQ | GRE_KEY))
		return -EINVAL;

	/* ERSPAN Session ID only has 10-bit. Since we reuse
+62 −1
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"

#define _htonl __builtin_bswap32
#define ERROR(ret) do {\
@@ -38,6 +39,10 @@ struct vxlan_metadata {
	u32     gbp;
};

struct erspan_metadata {
	__be32 index;
};

SEC("gre_set_tunnel")
int _gre_set_tunnel(struct __sk_buff *skb)
{
@@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
	return TC_ACT_OK;
}

SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
	struct bpf_tunnel_key key;
	struct erspan_metadata md;
	int ret;

	__builtin_memset(&key, 0x0, sizeof(key));
	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
	key.tunnel_id = 2;
	key.tunnel_tos = 0;
	key.tunnel_ttl = 64;

	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
	if (ret < 0) {
		ERROR(ret);
		return TC_ACT_SHOT;
	}

	md.index = htonl(123);
	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
	if (ret < 0) {
		ERROR(ret);
		return TC_ACT_SHOT;
	}

	return TC_ACT_OK;
}

SEC("erspan_get_tunnel")
int _erspan_get_tunnel(struct __sk_buff *skb)
{
	char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
	struct bpf_tunnel_key key;
	struct erspan_metadata md;
	u32 index;
	int ret;

	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
	if (ret < 0) {
		ERROR(ret);
		return TC_ACT_SHOT;
	}

	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
	if (ret < 0) {
		ERROR(ret);
		return TC_ACT_SHOT;
	}

	index = bpf_ntohl(md.index);
	bpf_trace_printk(fmt, sizeof(fmt),
			key.tunnel_id, key.remote_ipv4, index);

	return TC_ACT_OK;
}

SEC("vxlan_set_tunnel")
int _vxlan_set_tunnel(struct __sk_buff *skb)
{
@@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
	return TC_ACT_OK;
}


char _license[] SEC("license") = "GPL";
+29 −0
Original line number Diff line number Diff line
@@ -32,6 +32,19 @@ function add_gre_tunnel {
	ip addr add dev $DEV 10.1.1.200/24
}

function add_erspan_tunnel {
	# in namespace
	ip netns exec at_ns0 \
		ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
	ip netns exec at_ns0 ip link set dev $DEV_NS up
	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24

	# out of namespace
	ip link add dev $DEV type $TYPE external
	ip link set dev $DEV up
	ip addr add dev $DEV 10.1.1.200/24
}

function add_vxlan_tunnel {
	# Set static ARP entry here because iptables set-mark works
	# on L3 packet, as a result not applying to ARP packets,
@@ -99,6 +112,18 @@ function test_gre {
	cleanup
}

function test_erspan {
	TYPE=erspan
	DEV_NS=erspan00
	DEV=erspan11
	config_device
	add_erspan_tunnel
	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
	ping -c 1 10.1.1.100
	ip netns exec at_ns0 ping -c 1 10.1.1.200
	cleanup
}

function test_vxlan {
	TYPE=vxlan
	DEV_NS=vxlan00
@@ -151,14 +176,18 @@ function cleanup {
	ip link del gretap11
	ip link del vxlan11
	ip link del geneve11
	ip link del erspan11
	pkill tcpdump
	pkill cat
	set -ex
}

trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
echo "Testing ERSPAN tunnel..."
test_erspan
echo "Testing VXLAN tunnel..."
test_vxlan
echo "Testing GENEVE tunnel..."