Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 614732ea authored by Thomas Graf's avatar Thomas Graf Committed by David S. Miller
Browse files

openvswitch: Use regular VXLAN net_device device



This gets rid of all OVS specific VXLAN code in the receive and
transmit path by using a VXLAN net_device to represent the vport.
Only a small shim layer remains which takes care of handling the
VXLAN specific OVS Netlink configuration.

Unexports vxlan_sock_add(), vxlan_sock_release(), vxlan_xmit_skb()
since they are no longer needed.

Signed-off-by: default avatarThomas Graf <tgraf@suug.ch>
Signed-off-by: default avatarPravin B Shelar <pshelar@nicira.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c9db965c
Loading
Loading
Loading
Loading
+116 −126
Original line number Diff line number Diff line
@@ -75,6 +75,9 @@ static struct rtnl_link_ops vxlan_link_ops;

static const u8 all_zeros_mac[ETH_ALEN];

static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
					 bool no_share, u32 flags);

/* per-network namespace private data for this module */
struct vxlan_net {
	struct list_head  vxlan_list;
@@ -1027,7 +1030,7 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
	return false;
}

void vxlan_sock_release(struct vxlan_sock *vs)
static void vxlan_sock_release(struct vxlan_sock *vs)
{
	struct sock *sk = vs->sock->sk;
	struct net *net = sock_net(sk);
@@ -1043,7 +1046,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)

	queue_work(vxlan_wq, &vs->del_work);
}
EXPORT_SYMBOL_GPL(vxlan_sock_release);

/* Update multicast group membership when first VNI on
 * multicast address is brought up
@@ -1126,6 +1128,102 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
	return vh;
}

static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
		      struct vxlan_metadata *md, u32 vni,
		      struct metadata_dst *tun_dst)
{
	struct iphdr *oip = NULL;
	struct ipv6hdr *oip6 = NULL;
	struct vxlan_dev *vxlan;
	struct pcpu_sw_netstats *stats;
	union vxlan_addr saddr;
	int err = 0;
	union vxlan_addr *remote_ip;

	/* For flow based devices, map all packets to VNI 0 */
	if (vs->flags & VXLAN_F_FLOW_BASED)
		vni = 0;

	/* Is this VNI defined? */
	vxlan = vxlan_vs_find_vni(vs, vni);
	if (!vxlan)
		goto drop;

	remote_ip = &vxlan->default_dst.remote_ip;
	skb_reset_mac_header(skb);
	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
	skb->protocol = eth_type_trans(skb, vxlan->dev);
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);

	/* Ignore packet loops (and multicast echo) */
	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
		goto drop;

	/* Re-examine inner Ethernet packet */
	if (remote_ip->sa.sa_family == AF_INET) {
		oip = ip_hdr(skb);
		saddr.sin.sin_addr.s_addr = oip->saddr;
		saddr.sa.sa_family = AF_INET;
#if IS_ENABLED(CONFIG_IPV6)
	} else {
		oip6 = ipv6_hdr(skb);
		saddr.sin6.sin6_addr = oip6->saddr;
		saddr.sa.sa_family = AF_INET6;
#endif
	}

	if (tun_dst) {
		skb_dst_set(skb, (struct dst_entry *)tun_dst);
		tun_dst = NULL;
	}

	if ((vxlan->flags & VXLAN_F_LEARN) &&
	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
		goto drop;

	skb_reset_network_header(skb);
	/* In flow-based mode, GBP is carried in dst_metadata */
	if (!(vs->flags & VXLAN_F_FLOW_BASED))
		skb->mark = md->gbp;

	if (oip6)
		err = IP6_ECN_decapsulate(oip6, skb);
	if (oip)
		err = IP_ECN_decapsulate(oip, skb);

	if (unlikely(err)) {
		if (log_ecn_error) {
			if (oip6)
				net_info_ratelimited("non-ECT from %pI6\n",
						     &oip6->saddr);
			if (oip)
				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
						     &oip->saddr, oip->tos);
		}
		if (err > 1) {
			++vxlan->dev->stats.rx_frame_errors;
			++vxlan->dev->stats.rx_errors;
			goto drop;
		}
	}

	stats = this_cpu_ptr(vxlan->dev->tstats);
	u64_stats_update_begin(&stats->syncp);
	stats->rx_packets++;
	stats->rx_bytes += skb->len;
	u64_stats_update_end(&stats->syncp);

	netif_rx(skb);

	return;
drop:
	if (tun_dst)
		dst_release((struct dst_entry *)tun_dst);

	/* Consume bad packet */
	kfree_skb(skb);
}

/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
{
@@ -1192,7 +1290,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
			info->key.tun_flags |= TUNNEL_CSUM;

		md = ip_tunnel_info_opts(info, sizeof(*md));
		md->tun_dst = tun_dst;
	} else {
		memset(md, 0, sizeof(*md));
	}
@@ -1231,8 +1328,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
		goto bad_flags;
	}

	md->vni = vxh->vx_vni;
	vs->rcv(vs, skb, md);
	vxlan_rcv(vs, skb, md, vni >> 8, tun_dst);
	return 0;

drop:
@@ -1252,104 +1348,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
	return 1;
}

static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
		      struct vxlan_metadata *md)
{
	struct iphdr *oip = NULL;
	struct ipv6hdr *oip6 = NULL;
	struct vxlan_dev *vxlan;
	struct pcpu_sw_netstats *stats;
	union vxlan_addr saddr;
	__u32 vni;
	int err = 0;
	union vxlan_addr *remote_ip;

	/* For flow based devices, map all packets to VNI 0 */
	if (vs->flags & VXLAN_F_FLOW_BASED)
		vni = 0;
	else
		vni = ntohl(md->vni) >> 8;

	/* Is this VNI defined? */
	vxlan = vxlan_vs_find_vni(vs, vni);
	if (!vxlan)
		goto drop;

	remote_ip = &vxlan->default_dst.remote_ip;
	skb_reset_mac_header(skb);
	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
	skb->protocol = eth_type_trans(skb, vxlan->dev);
	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);

	/* Ignore packet loops (and multicast echo) */
	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
		goto drop;

	/* Re-examine inner Ethernet packet */
	if (remote_ip->sa.sa_family == AF_INET) {
		oip = ip_hdr(skb);
		saddr.sin.sin_addr.s_addr = oip->saddr;
		saddr.sa.sa_family = AF_INET;
#if IS_ENABLED(CONFIG_IPV6)
	} else {
		oip6 = ipv6_hdr(skb);
		saddr.sin6.sin6_addr = oip6->saddr;
		saddr.sa.sa_family = AF_INET6;
#endif
	}

	if (md->tun_dst) {
		skb_dst_set(skb, (struct dst_entry *)md->tun_dst);
		md->tun_dst = NULL;
	}

	if ((vxlan->flags & VXLAN_F_LEARN) &&
	    vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
		goto drop;

	skb_reset_network_header(skb);
	/* In flow-based mode, GBP is carried in dst_metadata */
	if (!(vs->flags & VXLAN_F_FLOW_BASED))
		skb->mark = md->gbp;

	if (oip6)
		err = IP6_ECN_decapsulate(oip6, skb);
	if (oip)
		err = IP_ECN_decapsulate(oip, skb);

	if (unlikely(err)) {
		if (log_ecn_error) {
			if (oip6)
				net_info_ratelimited("non-ECT from %pI6\n",
						     &oip6->saddr);
			if (oip)
				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
						     &oip->saddr, oip->tos);
		}
		if (err > 1) {
			++vxlan->dev->stats.rx_frame_errors;
			++vxlan->dev->stats.rx_errors;
			goto drop;
		}
	}

	stats = this_cpu_ptr(vxlan->dev->tstats);
	u64_stats_update_begin(&stats->syncp);
	stats->rx_packets++;
	stats->rx_bytes += skb->len;
	u64_stats_update_end(&stats->syncp);

	netif_rx(skb);

	return;
drop:
	if (md->tun_dst)
		dst_release((struct dst_entry *)md->tun_dst);

	/* Consume bad packet */
	kfree_skb(skb);
}

static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -1688,7 +1686,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
			   struct sk_buff *skb,
			   struct net_device *dev, struct in6_addr *saddr,
			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
			   __be16 src_port, __be16 dst_port,
			   __be16 src_port, __be16 dst_port, __u32 vni,
			   struct vxlan_metadata *md, bool xnet, u32 vxflags)
{
	struct vxlanhdr *vxh;
@@ -1738,7 +1736,7 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,

	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_HF_VNI);
	vxh->vx_vni = md->vni;
	vxh->vx_vni = vni;

	if (type & SKB_GSO_TUNNEL_REMCSUM) {
		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1771,9 +1769,9 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
}
#endif

int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
			  __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port,
			  __be16 src_port, __be16 dst_port, __u32 vni,
			  struct vxlan_metadata *md, bool xnet, u32 vxflags)
{
	struct vxlanhdr *vxh;
@@ -1817,7 +1815,7 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,

	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_HF_VNI);
	vxh->vx_vni = md->vni;
	vxh->vx_vni = vni;

	if (type & SKB_GSO_TUNNEL_REMCSUM) {
		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1844,7 +1842,6 @@ int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
				   ttl, df, src_port, dst_port, xnet,
				   !(vxflags & VXLAN_F_UDP_CSUM));
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);

/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
@@ -2012,10 +2009,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
		md->vni = htonl(vni << 8);
		err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
				     dst->sin.sin_addr.s_addr, tos, ttl, df,
				     src_port, dst_port, md,
				     src_port, dst_port, htonl(vni << 8), md,
				     !net_eq(vxlan->net, dev_net(vxlan->dev)),
				     flags);
		if (err < 0) {
@@ -2070,11 +2066,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		}

		ttl = ttl ? : ip6_dst_hoplimit(ndst);
		md->vni = htonl(vni << 8);
		md->gbp = skb->mark;

		err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr,
				      0, ttl, src_port, dst_port, md,
				      0, ttl, src_port, dst_port, htonl(vni << 8), md,
				      !net_eq(vxlan->net, dev_net(vxlan->dev)),
				      vxlan->flags);
#endif
@@ -2269,8 +2264,8 @@ static int vxlan_open(struct net_device *dev)
	struct vxlan_sock *vs;
	int ret = 0;

	vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port, vxlan_rcv,
			    NULL, vxlan->cfg.no_share, vxlan->flags);
	vs = vxlan_sock_add(vxlan->net, vxlan->cfg.dst_port,
			    vxlan->cfg.no_share, vxlan->flags);
	if (IS_ERR(vs))
		return PTR_ERR(vs);

@@ -2563,7 +2558,6 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,

/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
					      vxlan_rcv_t *rcv, void *data,
					      u32 flags)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -2592,8 +2586,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,

	vs->sock = sock;
	atomic_set(&vs->refcnt, 1);
	vs->rcv = rcv;
	vs->data = data;
	vs->flags = (flags & VXLAN_F_RCV_FLAGS);

	/* Initialize the vxlan udp offloads structure */
@@ -2617,8 +2609,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
	return vs;
}

struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
				  vxlan_rcv_t *rcv, void *data,
static struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
					 bool no_share, u32 flags)
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -2629,7 +2620,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
		spin_lock(&vn->sock_lock);
		vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port,
				     flags);
		if (vs && vs->rcv == rcv) {
		if (vs) {
			if (!atomic_add_unless(&vs->refcnt, 1, 0))
				vs = ERR_PTR(-EBUSY);
			spin_unlock(&vn->sock_lock);
@@ -2638,9 +2629,8 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
		spin_unlock(&vn->sock_lock);
	}

	return vxlan_socket_create(net, port, rcv, data, flags);
	return vxlan_socket_create(net, port, flags);
}
EXPORT_SYMBOL_GPL(vxlan_sock_add);

static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
			       struct vxlan_config *conf)
+1 −0
Original line number Diff line number Diff line
@@ -141,6 +141,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
				    unsigned char name_assign_type,
				    const struct rtnl_link_ops *ops,
				    struct nlattr *tb[]);
int rtnl_delete_link(struct net_device *dev);
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);

int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len);
+4 −20
Original line number Diff line number Diff line
@@ -101,22 +101,12 @@ struct vxlanhdr {
#define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)

struct vxlan_metadata {
	__be32		vni;
	u32		gbp;

	/* Temporary until vxlan_rcv() API is gone */
	struct metadata_dst *tun_dst;
};

struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
			   struct vxlan_metadata *md);

/* per UDP socket information */
struct vxlan_sock {
	struct hlist_node hlist;
	vxlan_rcv_t	 *rcv;
	void		 *data;
	struct work_struct del_work;
	struct socket	 *sock;
	struct rcu_head	  rcu;
@@ -203,19 +193,13 @@ struct vxlan_dev {
					 VXLAN_F_COLLECT_METADATA |	\
					 VXLAN_F_FLOW_BASED)

struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
				  vxlan_rcv_t *rcv, void *data,
				  bool no_share, u32 flags);

struct net_device *vxlan_dev_create(struct net *net, const char *name,
				    u8 name_assign_type, struct vxlan_config *conf);

void vxlan_sock_release(struct vxlan_sock *vs);

int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
		   bool xnet, u32 vxflags);
static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan)
{
	return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport;
}

static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
						     netdev_features_t features)
+17 −9
Original line number Diff line number Diff line
@@ -1960,16 +1960,30 @@ static int rtnl_group_dellink(const struct net *net, int group)
	return 0;
}

int rtnl_delete_link(struct net_device *dev)
{
	const struct rtnl_link_ops *ops;
	LIST_HEAD(list_kill);

	ops = dev->rtnl_link_ops;
	if (!ops || !ops->dellink)
		return -EOPNOTSUPP;

	ops->dellink(dev, &list_kill);
	unregister_netdevice_many(&list_kill);

	return 0;
}
EXPORT_SYMBOL_GPL(rtnl_delete_link);

static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
	struct net *net = sock_net(skb->sk);
	const struct rtnl_link_ops *ops;
	struct net_device *dev;
	struct ifinfomsg *ifm;
	char ifname[IFNAMSIZ];
	struct nlattr *tb[IFLA_MAX+1];
	int err;
	LIST_HEAD(list_kill);

	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
	if (err < 0)
@@ -1991,13 +2005,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
	if (!dev)
		return -ENODEV;

	ops = dev->rtnl_link_ops;
	if (!ops || !ops->dellink)
		return -EOPNOTSUPP;

	ops->dellink(dev, &list_kill);
	unregister_netdevice_many(&list_kill);
	return 0;
	return rtnl_delete_link(dev);
}

int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
+0 −12
Original line number Diff line number Diff line
@@ -44,18 +44,6 @@ config OPENVSWITCH_GRE

	  If unsure, say Y.

config OPENVSWITCH_VXLAN
	tristate "Open vSwitch VXLAN tunneling support"
	depends on OPENVSWITCH
	depends on VXLAN
	default OPENVSWITCH
	---help---
	  If you say Y here, then the Open vSwitch will be able create vxlan vport.

	  Say N to exclude this support and reduce the binary size.

	  If unsure, say Y.

config OPENVSWITCH_GENEVE
	tristate "Open vSwitch Geneve tunneling support"
	depends on OPENVSWITCH
Loading