Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2e62fa69 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vxlan_group_policy_extension'

Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
   # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
   # iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3f3558bb 1dd144cf
Loading
Loading
Loading
Loading
+89 −24
Original line number Diff line number Diff line
@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}

/* Find VXLAN socket based on network namespace, address family and UDP port */
static struct vxlan_sock *vxlan_find_sock(struct net *net,
					  sa_family_t family, __be16 port)
/* Find VXLAN socket based on network namespace, address family and UDP port
 * and enabled unshareable flags.
 */
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
					  __be16 port, u32 flags)
{
	struct vxlan_sock *vs;
	u32 match_flags = flags & VXLAN_F_UNSHAREABLE;

	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
		if (inet_sk(vs->sock->sk)->inet_sport == port &&
		    inet_sk(vs->sock->sk)->sk.sk_family == family)
		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
		    (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
			return vs;
	}
	return NULL;
@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)

/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
					sa_family_t family, __be16 port)
					sa_family_t family, __be16 port,
					u32 flags)
{
	struct vxlan_sock *vs;

	vs = vxlan_find_sock(net, family, port);
	vs = vxlan_find_sock(net, family, port, flags);
	if (!vs)
		return NULL;

@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
			continue;

		vh2 = (struct vxlanhdr *)(p->data + off_vx);
		if (vh->vx_vni != vh2->vx_vni) {
		if (vh->vx_flags != vh2->vx_flags ||
		    vh->vx_vni != vh2->vx_vni) {
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}
@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
	struct vxlan_sock *vs;
	struct vxlanhdr *vxh;
	u32 flags, vni;
	struct vxlan_metadata md = {0};

	/* Need Vxlan and inner Ethernet header to be present */
	if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
		vni &= VXLAN_VID_MASK;
	}

	/* For backwards compatibility, only allow reserved fields to be
	 * used by VXLAN extensions if explicitly requested.
	 */
	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
		struct vxlanhdr_gbp *gbp;

		gbp = (struct vxlanhdr_gbp *)vxh;
		md.gbp = ntohs(gbp->policy_id);

		if (gbp->dont_learn)
			md.gbp |= VXLAN_GBP_DONT_LEARN;

		if (gbp->policy_applied)
			md.gbp |= VXLAN_GBP_POLICY_APPLIED;

		flags &= ~VXLAN_GBP_USED_BITS;
	}

	if (flags || (vni & ~VXLAN_VID_MASK)) {
		/* If there are any unprocessed flags remaining treat
		 * this as a malformed packet. This behavior diverges from
@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
		goto bad_flags;
	}

	vs->rcv(vs, skb, vxh->vx_vni);
	md.vni = vxh->vx_vni;
	vs->rcv(vs, skb, &md);
	return 0;

drop:
@@ -1246,8 +1272,8 @@ error:
	return 1;
}

static void vxlan_rcv(struct vxlan_sock *vs,
		      struct sk_buff *skb, __be32 vx_vni)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
		      struct vxlan_metadata *md)
{
	struct iphdr *oip = NULL;
	struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
	int err = 0;
	union vxlan_addr *remote_ip;

	vni = ntohl(vx_vni) >> 8;
	vni = ntohl(md->vni) >> 8;
	/* Is this VNI defined? */
	vxlan = vxlan_vs_find_vni(vs, vni);
	if (!vxlan)
@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
		goto drop;

	skb_reset_network_header(skb);
	skb->mark = md->gbp;

	if (oip6)
		err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
	return false;
}

static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
				struct vxlan_metadata *md)
{
	struct vxlanhdr_gbp *gbp;

	gbp = (struct vxlanhdr_gbp *)vxh;
	vxh->vx_flags |= htonl(VXLAN_HF_GBP);

	if (md->gbp & VXLAN_GBP_DONT_LEARN)
		gbp->dont_learn = 1;

	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
		gbp->policy_applied = 1;

	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}

#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
			   struct dst_entry *dst, struct sk_buff *skb,
			   struct net_device *dev, struct in6_addr *saddr,
			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
			   __be16 src_port, __be16 dst_port, __be32 vni,
			   bool xnet)
			   __be16 src_port, __be16 dst_port,
			   struct vxlan_metadata *md, bool xnet)
{
	struct vxlanhdr *vxh;
	int min_headroom;
@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,

	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_HF_VNI);
	vxh->vx_vni = vni;
	vxh->vx_vni = md->vni;

	if (type & SKB_GSO_TUNNEL_REMCSUM) {
		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
		}
	}

	if (vs->flags & VXLAN_F_GBP)
		vxlan_build_gbp_hdr(vxh, vs, md);

	skb_set_inner_protocol(skb, htons(ETH_P_TEB));

	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1775,8 @@ err:
int vxlan_xmit_skb(struct vxlan_sock *vs,
		   struct rtable *rt, struct sk_buff *skb,
		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
		   __be16 src_port, __be16 dst_port,
		   struct vxlan_metadata *md, bool xnet)
{
	struct vxlanhdr *vxh;
	int min_headroom;
@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,

	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_HF_VNI);
	vxh->vx_vni = vni;
	vxh->vx_vni = md->vni;

	if (type & SKB_GSO_TUNNEL_REMCSUM) {
		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
		}
	}

	if (vs->flags & VXLAN_F_GBP)
		vxlan_build_gbp_hdr(vxh, vs, md);

	skb_set_inner_protocol(skb, htons(ETH_P_TEB));

	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	const struct iphdr *old_iph;
	struct flowi4 fl4;
	union vxlan_addr *dst;
	struct vxlan_metadata md;
	__be16 src_port = 0, dst_port;
	u32 vni;
	__be16 df = 0;
@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

			ip_rt_put(rt);
			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
						   dst->sa.sa_family, dst_port);
						   dst->sa.sa_family, dst_port,
						   vxlan->flags);
			if (!dst_vxlan)
				goto tx_error;
			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
		md.vni = htonl(vni << 8);
		md.gbp = skb->mark;

		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
				     fl4.saddr, dst->sin.sin_addr.s_addr,
				     tos, ttl, df, src_port, dst_port,
				     htonl(vni << 8),
				     tos, ttl, df, src_port, dst_port, &md,
				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
		if (err < 0) {
			/* skb is already freed. */
@@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

			dst_release(ndst);
			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
						   dst->sa.sa_family, dst_port);
						   dst->sa.sa_family, dst_port,
						   vxlan->flags);
			if (!dst_vxlan)
				goto tx_error;
			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		}

		ttl = ttl ? : ip6_dst_hoplimit(ndst);
		md.vni = htonl(vni << 8);
		md.gbp = skb->mark;

		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
				      src_port, dst_port, htonl(vni << 8),
				      src_port, dst_port, &md,
				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif
	}
@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)

	spin_lock(&vn->sock_lock);
	vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
			     vxlan->dst_port);
			     vxlan->dst_port, vxlan->flags);
	if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
		/* If we have a socket with same port already, reuse it */
		vxlan_vs_add_dev(vs, vxlan);
@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
};

static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
		return vs;

	spin_lock(&vn->sock_lock);
	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
	if (vs && ((vs->rcv != rcv) ||
		   !atomic_add_unless(&vs->refcnt, 1, 0)))
			vs = ERR_PTR(-EBUSY);
@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
		vxlan->flags |= VXLAN_F_REMCSUM_RX;

	if (data[IFLA_VXLAN_GBP])
		vxlan->flags |= VXLAN_F_GBP;

	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
			   vxlan->dst_port)) {
			   vxlan->dst_port, vxlan->flags)) {
		pr_info("duplicate VNI %u\n", vni);
		return -EEXIST;
	}
@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
		goto nla_put_failure;

	if (vxlan->flags & VXLAN_F_GBP &&
	    nla_put_flag(skb, IFLA_VXLAN_GBP))
		goto nla_put_failure;

	return 0;

nla_put_failure:
+4 −1
Original line number Diff line number Diff line
@@ -97,7 +97,10 @@ struct ip_tunnel {
#define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
#define TUNNEL_OAM		__cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)

#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)

struct tnl_ptk_info {
	__be16 flags;
+77 −5
Original line number Diff line number Diff line
@@ -11,15 +11,76 @@
#define VNI_HASH_BITS	10
#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)

/* VXLAN protocol header */
/*
 * VXLAN Group Based Policy Extension:
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R|        Group Policy ID        |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |                VXLAN Network Identifier (VNI) |   Reserved    |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 * D = Don't Learn bit. When set, this bit indicates that the egress
 *     VTEP MUST NOT learn the source address of the encapsulated frame.
 *
 * A = Indicates that the group policy has already been applied to
 *     this packet. Policies MUST NOT be applied by devices when the
 *     A bit is set.
 *
 * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
 */
struct vxlanhdr_gbp {
	__u8	vx_flags;
#ifdef __LITTLE_ENDIAN_BITFIELD
	__u8	reserved_flags1:3,
		policy_applied:1,
		reserved_flags2:2,
		dont_learn:1,
		reserved_flags3:1;
#elif defined(__BIG_ENDIAN_BITFIELD)
	__u8	reserved_flags1:1,
		dont_learn:1,
		reserved_flags2:2,
		policy_applied:1,
		reserved_flags3:3;
#else
#error	"Please fix <asm/byteorder.h>"
#endif
	__be16	policy_id;
	__be32	vx_vni;
};

#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)

/* skb->mark mapping
 *
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 */
#define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
#define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
#define VXLAN_GBP_ID_MASK		(0xFFFF)

/* VXLAN protocol header:
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |G|R|R|R|I|R|R|C|               Reserved                        |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |                VXLAN Network Identifier (VNI) |   Reserved    |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 *
 * G = 1	Group Policy (VXLAN-GBP)
 * I = 1	VXLAN Network Identifier (VNI) present
 * C = 1	Remote checksum offload (RCO)
 */
struct vxlanhdr {
	__be32 vx_flags;
	__be32 vx_vni;
};

/* VXLAN header flags. */
#define VXLAN_HF_VNI 0x08000000
#define VXLAN_HF_RCO 0x00200000
#define VXLAN_HF_RCO BIT(24)
#define VXLAN_HF_VNI BIT(27)
#define VXLAN_HF_GBP BIT(31)

/* Remote checksum offload header option */
#define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
@@ -32,8 +93,14 @@ struct vxlanhdr {
#define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))

struct vxlan_metadata {
	__be32		vni;
	u32		gbp;
};

struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
			   struct vxlan_metadata *md);

/* per UDP socket information */
struct vxlan_sock {
@@ -60,6 +127,10 @@ struct vxlan_sock {
#define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
#define VXLAN_F_REMCSUM_TX		0x200
#define VXLAN_F_REMCSUM_RX		0x400
#define VXLAN_F_GBP			0x800

/* These flags must match in order for a socket to be shareable */
#define VXLAN_F_UNSHAREABLE		VXLAN_F_GBP

struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
				  vxlan_rcv_t *rcv, void *data,
@@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct vxlan_sock *vs,
		   struct rtable *rt, struct sk_buff *skb,
		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
		   bool xnet);

static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
						     netdev_features_t features)
+1 −0
Original line number Diff line number Diff line
@@ -372,6 +372,7 @@ enum {
	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
	IFLA_VXLAN_REMCSUM_TX,
	IFLA_VXLAN_REMCSUM_RX,
	IFLA_VXLAN_GBP,
	__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
+11 −0
Original line number Diff line number Diff line
@@ -252,11 +252,21 @@ enum ovs_vport_attr {

#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)

enum {
	OVS_VXLAN_EXT_UNSPEC,
	OVS_VXLAN_EXT_GBP,	/* Flag or __u32 */
	__OVS_VXLAN_EXT_MAX,
};

#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)


/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
 */
enum {
	OVS_TUNNEL_ATTR_UNSPEC,
	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
	OVS_TUNNEL_ATTR_EXTENSION,
	__OVS_TUNNEL_ATTR_MAX
};

@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
	OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,		/* Nested OVS_VXLAN_EXT_* */
	__OVS_TUNNEL_KEY_ATTR_MAX
};

Loading