Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 563b4495 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'openvswitch_vxlan'



Pravin B Shelar says:

====================
openvswitch: VXLAN tunneling.

First four vxlan patches extends vxlan so that openvswitch
can share vxlan recv code. Rest of patches refactors vxlan
data plane so that ovs can share that code with vxlan module.
Last patch adds vxlan-vport to openvswitch.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7559fb2f 58264848
Loading
Loading
Loading
Loading
+202 −140
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include <linux/igmp.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/hash.h>
#include <linux/ethtool.h>
#include <net/arp.h>
@@ -41,6 +42,7 @@
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/vxlan.h>

#define VXLAN_VERSION	"0.1"

@@ -57,6 +59,7 @@
#define VXLAN_VID_MASK	(VXLAN_N_VID - 1)
/* IP header + UDP + VXLAN + Ethernet header */
#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))

#define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */

@@ -82,16 +85,6 @@ static int vxlan_net_id;

static const u8 all_zeros_mac[ETH_ALEN];

/* per UDP socket information */
struct vxlan_sock {
	struct hlist_node hlist;
	struct rcu_head	  rcu;
	struct work_struct del_work;
	atomic_t	  refcnt;
	struct socket	  *sock;
	struct hlist_head vni_list[VNI_HASH_SIZE];
};

/* per-network namespace private data for this module */
struct vxlan_net {
	struct list_head  vxlan_list;
@@ -188,7 +181,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
}

/* Find VXLAN socket based on network namespace and UDP port */
static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
{
	struct vxlan_sock *vs;

@@ -199,16 +192,10 @@ static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
	return NULL;
}

/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
{
	struct vxlan_sock *vs;
	struct vxlan_dev *vxlan;

	vs = vxlan_find_port(net, port);
	if (!vs)
		return NULL;

	hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
		if (vxlan->default_dst.remote_vni == id)
			return vxlan;
@@ -217,6 +204,18 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
	return NULL;
}

/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
{
	struct vxlan_sock *vs;

	vs = vxlan_find_sock(net, port);
	if (!vs)
		return NULL;

	return vxlan_vs_find_vni(vs, id);
}

/* Fill in neighbour message in skbuff. */
static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
			  const struct vxlan_fdb *fdb,
@@ -802,8 +801,10 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
	atomic_inc(&vs->refcnt);
}

static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
void vxlan_sock_release(struct vxlan_sock *vs)
{
	struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);

	if (!atomic_dec_and_test(&vs->refcnt))
		return;

@@ -813,6 +814,7 @@ static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)

	queue_work(vxlan_wq, &vs->del_work);
}
EXPORT_SYMBOL_GPL(vxlan_sock_release);

/* Callback to update multicast group membership when first VNI on
 * multicast asddress is brought up
@@ -821,7 +823,6 @@ static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
static void vxlan_igmp_join(struct work_struct *work)
{
	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
	struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
	struct vxlan_sock *vs = vxlan->vn_sock;
	struct sock *sk = vs->sock->sk;
	struct ip_mreqn mreq = {
@@ -833,7 +834,7 @@ static void vxlan_igmp_join(struct work_struct *work)
	ip_mc_join_group(sk, &mreq);
	release_sock(sk);

	vxlan_sock_release(vn, vs);
	vxlan_sock_release(vs);
	dev_put(vxlan->dev);
}

@@ -841,7 +842,6 @@ static void vxlan_igmp_join(struct work_struct *work)
static void vxlan_igmp_leave(struct work_struct *work)
{
	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
	struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
	struct vxlan_sock *vs = vxlan->vn_sock;
	struct sock *sk = vs->sock->sk;
	struct ip_mreqn mreq = {
@@ -853,30 +853,23 @@ static void vxlan_igmp_leave(struct work_struct *work)
	ip_mc_leave_group(sk, &mreq);
	release_sock(sk);

	vxlan_sock_release(vn, vs);
	vxlan_sock_release(vs);
	dev_put(vxlan->dev);
}

/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
	struct iphdr *oip;
	struct vxlan_sock *vs;
	struct vxlanhdr *vxh;
	struct vxlan_dev *vxlan;
	struct pcpu_tstats *stats;
	__be16 port;
	__u32 vni;
	int err;

	/* pop off outer UDP header */
	__skb_pull(skb, sizeof(struct udphdr));

	/* Need Vxlan and inner Ethernet header to be present */
	if (!pskb_may_pull(skb, sizeof(struct vxlanhdr)))
	if (!pskb_may_pull(skb, VXLAN_HLEN))
		goto error;

	/* Drop packets with reserved bits set */
	vxh = (struct vxlanhdr *) skb->data;
	/* Return packets with reserved bits set */
	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
	if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
	    (vxh->vx_vni & htonl(0xff))) {
		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
@@ -884,28 +877,44 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
		goto error;
	}

	__skb_pull(skb, sizeof(struct vxlanhdr));
	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
		goto drop;

	/* Is this VNI defined? */
	vni = ntohl(vxh->vx_vni) >> 8;
	port = inet_sk(sk)->inet_sport;
	vxlan = vxlan_find_vni(sock_net(sk), vni, port);
	if (!vxlan) {
		netdev_dbg(skb->dev, "unknown vni %d port %u\n",
			   vni, ntohs(port));

	vs = vxlan_find_sock(sock_net(sk), port);
	if (!vs)
		goto drop;

	vs->rcv(vs, skb, vxh->vx_vni);
	return 0;

drop:
	/* Consume bad packet */
	kfree_skb(skb);
	return 0;

error:
	/* Return non vxlan pkt */
	return 1;
}

	if (!pskb_may_pull(skb, ETH_HLEN)) {
		vxlan->dev->stats.rx_length_errors++;
		vxlan->dev->stats.rx_errors++;
static void vxlan_rcv(struct vxlan_sock *vs,
		      struct sk_buff *skb, __be32 vx_vni)
{
	struct iphdr *oip;
	struct vxlan_dev *vxlan;
	struct pcpu_tstats *stats;
	__u32 vni;
	int err;

	vni = ntohl(vx_vni) >> 8;
	/* Is this VNI defined? */
	vxlan = vxlan_vs_find_vni(vs, vni);
	if (!vxlan)
		goto drop;
	}

	skb_reset_mac_header(skb);

	/* Re-examine inner Ethernet packet */
	oip = ip_hdr(skb);
	skb->protocol = eth_type_trans(skb, vxlan->dev);

	/* Ignore packet loops (and multicast echo) */
@@ -913,11 +922,12 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
			       vxlan->dev->dev_addr) == 0)
		goto drop;

	/* Re-examine inner Ethernet packet */
	oip = ip_hdr(skb);
	if ((vxlan->flags & VXLAN_F_LEARN) &&
	    vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source))
		goto drop;

	__skb_tunnel_rx(skb, vxlan->dev);
	skb_reset_network_header(skb);

	/* If the NIC driver gave us an encapsulated packet with
@@ -951,16 +961,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)

	netif_rx(skb);

	return 0;
error:
	/* Put UDP header back */
	__skb_push(skb, sizeof(struct udphdr));

	return 1;
	return;
drop:
	/* Consume bad packet */
	kfree_skb(skb);
	return 0;
}

static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@ -1079,11 +1083,8 @@ static void vxlan_sock_put(struct sk_buff *skb)
}

/* On transmit, associate with the tunnel socket */
static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct sock *sk = vxlan->vn_sock->sock->sk;

	skb_orphan(skb);
	sock_hold(sk);
	skb->sk = sk;
@@ -1095,9 +1096,9 @@ static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
 *     better and maybe available from hardware
 *   secondary choice is to use jhash on the Ethernet header
 */
static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
{
	unsigned int range = (vxlan->port_max - vxlan->port_min) + 1;
	unsigned int range = (port_max - port_min) + 1;
	u32 hash;

	hash = skb_get_rxhash(skb);
@@ -1105,8 +1106,9 @@ static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
		hash = jhash(skb->data, 2 * ETH_ALEN,
			     (__force u32) skb->protocol);

	return htons((((u64) hash * range) >> 32) + vxlan->port_min);
	return htons((((u64) hash * range) >> 32) + port_min);
}
EXPORT_SYMBOL_GPL(vxlan_src_port);

static int handle_offloads(struct sk_buff *skb)
{
@@ -1122,6 +1124,64 @@ static int handle_offloads(struct sk_buff *skb)
	return 0;
}

int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
		   struct rtable *rt, struct sk_buff *skb,
		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port, __be32 vni)
{
	struct vxlanhdr *vxh;
	struct udphdr *uh;
	int min_headroom;
	int err;

	if (!skb->encapsulation) {
		skb_reset_inner_headers(skb);
		skb->encapsulation = 1;
	}

	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
			+ VXLAN_HLEN + sizeof(struct iphdr)
			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);

	/* Need space for new headers (invalidates iph ptr) */
	err = skb_cow_head(skb, min_headroom);
	if (unlikely(err))
		return err;

	if (vlan_tx_tag_present(skb)) {
		if (WARN_ON(!__vlan_put_tag(skb,
					    skb->vlan_proto,
					    vlan_tx_tag_get(skb))))
			return -ENOMEM;

		skb->vlan_tci = 0;
	}

	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_FLAGS);
	vxh->vx_vni = vni;

	__skb_push(skb, sizeof(*uh));
	skb_reset_transport_header(skb);
	uh = udp_hdr(skb);

	uh->dest = dst_port;
	uh->source = src_port;

	uh->len = htons(skb->len);
	uh->check = 0;

	vxlan_set_owner(vs->sock->sk, skb);

	err = handle_offloads(skb);
	if (err)
		return err;

	return iptunnel_xmit(net, rt, skb, src, dst,
			IPPROTO_UDP, tos, ttl, df);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);

/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
			       struct vxlan_dev *dst_vxlan)
@@ -1159,8 +1219,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct rtable *rt;
	const struct iphdr *old_iph;
	struct vxlanhdr *vxh;
	struct udphdr *uh;
	struct flowi4 fl4;
	__be32 dst;
	__be16 src_port, dst_port;
@@ -1182,15 +1240,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		goto drop;
	}

	if (!skb->encapsulation) {
		skb_reset_inner_headers(skb);
		skb->encapsulation = 1;
	}

	/* Need space for new headers (invalidates iph ptr) */
	if (skb_cow_head(skb, VXLAN_HEADROOM))
		goto drop;

	old_iph = ip_hdr(skb);

	ttl = vxlan->ttl;
@@ -1201,7 +1250,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	if (tos == 1)
		tos = ip_tunnel_get_dsfield(old_iph, skb);

	src_port = vxlan_src_port(vxlan, skb);
	src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);

	memset(&fl4, 0, sizeof(fl4));
	fl4.flowi4_oif = rdst->remote_ifindex;
@@ -1218,9 +1267,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

	if (rt->dst.dev == dev) {
		netdev_dbg(dev, "circular route to %pI4\n", &dst);
		ip_rt_put(rt);
		dev->stats.collisions++;
		goto tx_error;
		goto rt_tx_error;
	}

	/* Bypass encapsulation if the destination is local */
@@ -1235,30 +1283,16 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		vxlan_encap_bypass(skb, vxlan, dst_vxlan);
		return;
	}
	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_FLAGS);
	vxh->vx_vni = htonl(vni << 8);

	__skb_push(skb, sizeof(*uh));
	skb_reset_transport_header(skb);
	uh = udp_hdr(skb);

	uh->dest = dst_port;
	uh->source = src_port;

	uh->len = htons(skb->len);
	uh->check = 0;

	vxlan_set_owner(dev, skb);

	if (handle_offloads(skb))
		goto drop;

	tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);

	err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst,
			    IPPROTO_UDP, tos, ttl, df);
	err = vxlan_xmit_skb(dev_net(dev), vxlan->vn_sock, rt, skb,
			     fl4.saddr, dst, tos, ttl, df,
			     src_port, dst_port, htonl(vni << 8));

	if (err < 0)
		goto rt_tx_error;
	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);

	return;
@@ -1267,6 +1301,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	dev->stats.tx_dropped++;
	goto tx_free;

rt_tx_error:
	ip_rt_put(rt);
tx_error:
	dev->stats.tx_errors++;
tx_free:
@@ -1365,25 +1401,31 @@ static void vxlan_cleanup(unsigned long arg)
	mod_timer(&vxlan->age_timer, next_timer);
}

static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
{
	__u32 vni = vxlan->default_dst.remote_vni;

	vxlan->vn_sock = vs;
	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
}

/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	struct vxlan_sock *vs;
	__u32 vni = vxlan->default_dst.remote_vni;

	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;

	spin_lock(&vn->sock_lock);
	vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
	vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
	if (vs) {
		/* If we have a socket with same port already, reuse it */
		atomic_inc(&vs->refcnt);
		vxlan->vn_sock = vs;
		hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
		vxlan_vs_add_dev(vs, vxlan);
	} else {
		/* otherwise make new socket outside of RTNL */
		dev_hold(dev);
@@ -1408,13 +1450,12 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
static void vxlan_uninit(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	struct vxlan_sock *vs = vxlan->vn_sock;

	vxlan_fdb_delete_default(vxlan);

	if (vs)
		vxlan_sock_release(vn, vs);
		vxlan_sock_release(vs);
	free_percpu(dev->tstats);
}

@@ -1530,8 +1571,11 @@ static void vxlan_setup(struct net_device *dev)
	dev->features   |= NETIF_F_RXCSUM;
	dev->features   |= NETIF_F_GSO_SOFTWARE;

	dev->vlan_features = dev->features;
	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;

@@ -1631,8 +1675,10 @@ static void vxlan_del_work(struct work_struct *work)
	kfree_rcu(vs, rcu);
}

static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
					      vxlan_rcv_t *rcv, void *data)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_sock *vs;
	struct sock *sk;
	struct sockaddr_in vxlan_addr = {
@@ -1644,8 +1690,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
	unsigned int h;

	vs = kmalloc(sizeof(*vs), GFP_KERNEL);
	if (!vs)
	if (!vs) {
		pr_debug("memory alocation failure\n");
		return ERR_PTR(-ENOMEM);
	}

	for (h = 0; h < VNI_HASH_SIZE; ++h)
		INIT_HLIST_HEAD(&vs->vni_list[h]);
@@ -1673,57 +1721,70 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
		kfree(vs);
		return ERR_PTR(rc);
	}
	atomic_set(&vs->refcnt, 1);
	vs->rcv = rcv;
	vs->data = data;

	/* Disable multicast loopback */
	inet_sk(sk)->mc_loop = 0;
	spin_lock(&vn->sock_lock);
	hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
	spin_unlock(&vn->sock_lock);

	/* Mark socket as an encapsulation socket. */
	udp_sk(sk)->encap_type = 1;
	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
	udp_encap_enable();
	atomic_set(&vs->refcnt, 1);
	return vs;
}

struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
				  vxlan_rcv_t *rcv, void *data,
				  bool no_share)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_sock *vs;

	vs = vxlan_socket_create(net, port, rcv, data);
	if (!IS_ERR(vs))
		return vs;

	if (no_share)	/* Return error if sharing is not allowed. */
		return vs;

	spin_lock(&vn->sock_lock);
	vs = vxlan_find_sock(net, port);
	if (vs) {
		if (vs->rcv == rcv)
			atomic_inc(&vs->refcnt);
		else
			vs = ERR_PTR(-EBUSY);
	}
	spin_unlock(&vn->sock_lock);

	if (!vs)
		vs = ERR_PTR(-EINVAL);

	return vs;
}
EXPORT_SYMBOL_GPL(vxlan_sock_add);

/* Scheduled at device creation to bind to a socket */
static void vxlan_sock_work(struct work_struct *work)
{
	struct vxlan_dev *vxlan
		= container_of(work, struct vxlan_dev, sock_work);
	struct net_device *dev = vxlan->dev;
	struct net *net = dev_net(dev);
	__u32 vni = vxlan->default_dst.remote_vni;
	__be16 port = vxlan->dst_port;
	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
	struct net *net = dev_net(vxlan->dev);
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_sock *nvs, *ovs;

	nvs = vxlan_socket_create(net, port);
	if (IS_ERR(nvs)) {
		netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
			   PTR_ERR(nvs));
		goto out;
	}
	__be16 port = vxlan->dst_port;
	struct vxlan_sock *nvs;

	nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false);
	spin_lock(&vn->sock_lock);
	/* Look again to see if can reuse socket */
	ovs = vxlan_find_port(net, port);
	if (ovs) {
		atomic_inc(&ovs->refcnt);
		vxlan->vn_sock = ovs;
		hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
	if (!IS_ERR(nvs))
		vxlan_vs_add_dev(nvs, vxlan);
	spin_unlock(&vn->sock_lock);

		sk_release_kernel(nvs->sock->sk);
		kfree(nvs);
	} else {
		vxlan->vn_sock = nvs;
		hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
		hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
		spin_unlock(&vn->sock_lock);
	}
out:
	dev_put(dev);
	dev_put(vxlan->dev);
}

static int vxlan_newlink(struct net *net, struct net_device *dev,
@@ -1838,6 +1899,7 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
	struct vxlan_dev *vxlan = netdev_priv(dev);

	spin_lock(&vn->sock_lock);
	if (!hlist_unhashed(&vxlan->hlist))
		hlist_del_rcu(&vxlan->hlist);
	spin_unlock(&vn->sock_lock);

include/net/vxlan.h

0 → 100644
+39 −0
Original line number Diff line number Diff line
#ifndef __NET_VXLAN_H
#define __NET_VXLAN_H 1

#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/udp.h>

#define VNI_HASH_BITS	10
#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)

struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);

/* per UDP socket information */
struct vxlan_sock {
	struct hlist_node hlist;
	vxlan_rcv_t	 *rcv;
	void		 *data;
	struct work_struct del_work;
	struct socket	 *sock;
	struct rcu_head	  rcu;
	struct hlist_head vni_list[VNI_HASH_SIZE];
	atomic_t	  refcnt;
};

struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
				  vxlan_rcv_t *rcv, void *data,
				  bool no_share);

void vxlan_sock_release(struct vxlan_sock *vs);

int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
		   struct rtable *rt, struct sk_buff *skb,
		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port, __be32 vni);

__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);

#endif
+11 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@ enum ovs_vport_type {
	OVS_VPORT_TYPE_NETDEV,   /* network device */
	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
	OVS_VPORT_TYPE_GRE,      /* GRE tunnel. */
	OVS_VPORT_TYPE_VXLAN,	 /* VXLAN tunnel. */
	__OVS_VPORT_TYPE_MAX
};

@@ -211,6 +212,16 @@ enum ovs_vport_attr {

#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)

/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
 */
enum {
	OVS_TUNNEL_ATTR_UNSPEC,
	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
	__OVS_TUNNEL_ATTR_MAX
};

#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1)

/* Flows. */

#define OVS_FLOW_FAMILY  "ovs_flow"
+13 −0
Original line number Diff line number Diff line
@@ -40,3 +40,16 @@ config OPENVSWITCH_GRE
	  Say N to exclude this support and reduce the binary size.

	  If unsure, say Y.

config OPENVSWITCH_VXLAN
	bool "Open vSwitch VXLAN tunneling support"
	depends on INET
	depends on OPENVSWITCH
	depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
	default y
	---help---
	  If you say Y here, then the Open vSwitch will be able create vxlan vport.

	  Say N to exclude this support and reduce the binary size.

	  If unsure, say Y.
+4 −0
Original line number Diff line number Diff line
@@ -13,3 +13,7 @@ openvswitch-y := \
	vport-gre.o \
	vport-internal_dev.o \
	vport-netdev.o

ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o
endif
Loading