Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit dbbd136c authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'tunnels'



Pravin B Shelar says:

====================
Following patch series restructure GRE and IPIP tunneling code
to make it modular. It adds ip_tunnel module which acts as
generic tunneling layer which has common code.

These patches do not change any functionality.
v3:v4:
 - Fixed compilation error in ipv6.
 - Few coding style fixes.
v2-v3:
 - Use GPL exports for all export symbols.
 - Set default config NET_IP_TUNNEL to m.
v1-v2:
 - Dropped patch to convert gre_proto_lock to rtnl lock.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents eaac5f3d f61dd388
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -151,6 +151,7 @@ config MACVTAP
config VXLAN
       tristate "Virtual eXtensible Local Area Network (VXLAN)"
       depends on INET
       select NET_IP_TUNNEL
       ---help---
	  This allows one to create vxlan virtual interfaces that provide
	  Layer 2 Networks over Layer 3 Networks. VXLAN is often used
+10 −90
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@
#include <net/arp.h>
#include <net/ndisc.h>
#include <net/ip.h>
#include <net/ipip.h>
#include <net/ip_tunnels.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/rtnetlink.h>
@@ -101,20 +101,10 @@ struct vxlan_fdb {
	u8		  eth_addr[ETH_ALEN];
};

/* Per-cpu network traffic stats */
struct vxlan_stats {
	u64			rx_packets;
	u64			rx_bytes;
	u64			tx_packets;
	u64			tx_bytes;
	struct u64_stats_sync	syncp;
};

/* Pseudo network device */
struct vxlan_dev {
	struct hlist_node hlist;
	struct net_device *dev;
	struct vxlan_stats __percpu *stats;
	__u32		  vni;		/* virtual network id */
	__be32	          gaddr;	/* multicast group */
	__be32		  saddr;	/* source address */
@@ -667,7 +657,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
	struct iphdr *oip;
	struct vxlanhdr *vxh;
	struct vxlan_dev *vxlan;
	struct vxlan_stats *stats;
	struct pcpu_tstats *stats;
	__u32 vni;
	int err;

@@ -743,7 +733,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
		}
	}

	stats = this_cpu_ptr(vxlan->stats);
	stats = this_cpu_ptr(vxlan->dev->tstats);
	u64_stats_update_begin(&stats->syncp);
	stats->rx_packets++;
	stats->rx_bytes += skb->len;
@@ -874,28 +864,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
	return false;
}

/* Extract dsfield from inner protocol */
static inline u8 vxlan_get_dsfield(const struct iphdr *iph,
				   const struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP))
		return iph->tos;
	else if (skb->protocol == htons(ETH_P_IPV6))
		return ipv6_get_dsfield((const struct ipv6hdr *)iph);
	else
		return 0;
}

/* Propogate ECN bits out */
static inline u8 vxlan_ecn_encap(u8 tos,
				 const struct iphdr *iph,
				 const struct sk_buff *skb)
{
	u8 inner = vxlan_get_dsfield(iph, skb);

	return INET_ECN_encapsulate(tos, inner);
}

static void vxlan_sock_free(struct sk_buff *skb)
{
	sock_put(skb->sk);
@@ -974,8 +942,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

			/* short-circuited back to local bridge */
			if (netif_rx(skb) == NET_RX_SUCCESS) {
				struct vxlan_stats *stats =
						this_cpu_ptr(vxlan->stats);
				struct pcpu_tstats *stats = this_cpu_ptr(dev->tstats);

				u64_stats_update_begin(&stats->syncp);
				stats->tx_packets++;
@@ -1007,7 +974,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

	tos = vxlan->tos;
	if (tos == 1)
		tos = vxlan_get_dsfield(old_iph, skb);
		tos = ip_tunnel_get_dsfield(old_iph, skb);

	src_port = vxlan_src_port(vxlan, skb);

@@ -1058,7 +1025,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	iph->ihl	= sizeof(struct iphdr) >> 2;
	iph->frag_off	= df;
	iph->protocol	= IPPROTO_UDP;
	iph->tos	= vxlan_ecn_encap(tos, old_iph, skb);
	iph->tos	= ip_tunnel_ecn_encap(tos, old_iph, skb);
	iph->daddr	= dst;
	iph->saddr	= fl4.saddr;
	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -1183,10 +1150,8 @@ static void vxlan_cleanup(unsigned long arg)
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);

	vxlan->stats = alloc_percpu(struct vxlan_stats);
	if (!vxlan->stats)
	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;

	return 0;
@@ -1242,49 +1207,6 @@ static int vxlan_stop(struct net_device *dev)
	return 0;
}

/* Merge per-cpu statistics */
static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev,
					       struct rtnl_link_stats64 *stats)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_stats tmp, sum = { 0 };
	unsigned int cpu;

	for_each_possible_cpu(cpu) {
		unsigned int start;
		const struct vxlan_stats *stats
			= per_cpu_ptr(vxlan->stats, cpu);

		do {
			start = u64_stats_fetch_begin_bh(&stats->syncp);
			memcpy(&tmp, stats, sizeof(tmp));
		} while (u64_stats_fetch_retry_bh(&stats->syncp, start));

		sum.tx_bytes   += tmp.tx_bytes;
		sum.tx_packets += tmp.tx_packets;
		sum.rx_bytes   += tmp.rx_bytes;
		sum.rx_packets += tmp.rx_packets;
	}

	stats->tx_bytes   = sum.tx_bytes;
	stats->tx_packets = sum.tx_packets;
	stats->rx_bytes   = sum.rx_bytes;
	stats->rx_packets = sum.rx_packets;

	stats->multicast = dev->stats.multicast;
	stats->rx_length_errors = dev->stats.rx_length_errors;
	stats->rx_frame_errors = dev->stats.rx_frame_errors;
	stats->rx_errors = dev->stats.rx_errors;

	stats->tx_dropped = dev->stats.tx_dropped;
	stats->tx_carrier_errors  = dev->stats.tx_carrier_errors;
	stats->tx_aborted_errors  = dev->stats.tx_aborted_errors;
	stats->collisions  = dev->stats.collisions;
	stats->tx_errors = dev->stats.tx_errors;

	return stats;
}

/* Stub, nothing needs to be done. */
static void vxlan_set_multicast_list(struct net_device *dev)
{
@@ -1295,7 +1217,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
	.ndo_open		= vxlan_open,
	.ndo_stop		= vxlan_stop,
	.ndo_start_xmit		= vxlan_xmit,
	.ndo_get_stats64	= vxlan_stats64,
	.ndo_get_stats64	= ip_tunnel_get_stats64,
	.ndo_set_rx_mode	= vxlan_set_multicast_list,
	.ndo_change_mtu		= eth_change_mtu,
	.ndo_validate_addr	= eth_validate_addr,
@@ -1312,9 +1234,7 @@ static struct device_type vxlan_type = {

static void vxlan_free(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);

	free_percpu(vxlan->stats);
	free_percpu(dev->tstats);
	free_netdev(dev);
}

+51 −0
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
#define __LINUX_GRE_H

#include <linux/skbuff.h>
#include <net/ip_tunnels.h>

#define GREPROTO_CISCO		0
#define GREPROTO_PPTP		1
@@ -12,7 +13,57 @@ struct gre_protocol {
	void (*err_handler)(struct sk_buff *skb, u32 info);
};

struct gre_base_hdr {
	__be16 flags;
	__be16 protocol;
};
#define GRE_HEADER_SECTION 4

int gre_add_protocol(const struct gre_protocol *proto, u8 version);
int gre_del_protocol(const struct gre_protocol *proto, u8 version);

static inline __be16 gre_flags_to_tnl_flags(__be16 flags)
{
	__be16 tflags = 0;

	if (flags & GRE_CSUM)
		tflags |= TUNNEL_CSUM;
	if (flags & GRE_ROUTING)
		tflags |= TUNNEL_ROUTING;
	if (flags & GRE_KEY)
		tflags |= TUNNEL_KEY;
	if (flags & GRE_SEQ)
		tflags |= TUNNEL_SEQ;
	if (flags & GRE_STRICT)
		tflags |= TUNNEL_STRICT;
	if (flags & GRE_REC)
		tflags |= TUNNEL_REC;
	if (flags & GRE_VERSION)
		tflags |= TUNNEL_VERSION;

	return tflags;
}

static inline __be16 tnl_flags_to_gre_flags(__be16 tflags)
{
	__be16 flags = 0;

	if (tflags & TUNNEL_CSUM)
		flags |= GRE_CSUM;
	if (tflags & TUNNEL_ROUTING)
		flags |= GRE_ROUTING;
	if (tflags & TUNNEL_KEY)
		flags |= GRE_KEY;
	if (tflags & TUNNEL_SEQ)
		flags |= GRE_SEQ;
	if (tflags & TUNNEL_STRICT)
		flags |= GRE_STRICT;
	if (tflags & TUNNEL_REC)
		flags |= GRE_REC;
	if (tflags & TUNNEL_VERSION)
		flags |= GRE_VERSION;

	return flags;
}

#endif
+1 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@

#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_tunnel.h>
#include <linux/ip6_tunnel.h>

#define IP6TUNNEL_ERR_TIMEO (30*HZ)
+177 −0
Original line number Diff line number Diff line
#ifndef __NET_IPIP_H
#define __NET_IPIP_H 1
#ifndef __NET_IP_TUNNELS_H
#define __NET_IP_TUNNELS_H 1

#include <linux/if_tunnel.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <linux/u64_stats_sync.h>
#include <net/dsfield.h>
#include <net/gro_cells.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/rtnetlink.h>

#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#endif

/* Keep error state on tunnel for 30 sec */
#define IPTUNNEL_ERR_TIMEO	(30*HZ)

/* 6rd prefix/relay information */
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel_6rd_parm {
	struct in6_addr		prefix;
	__be32			relay_prefix;
	u16			prefixlen;
	u16			relay_prefixlen;
};
#endif

struct ip_tunnel_prl_entry {
	struct ip_tunnel_prl_entry __rcu *next;
	__be32				addr;
	u16				flags;
	struct rcu_head			rcu_head;
};

struct ip_tunnel {
	struct ip_tunnel __rcu	*next;
	struct hlist_node hash_node;
	struct net_device	*dev;

	int		err_count;	/* Number of arrived ICMP errors */
	unsigned long		err_time;	/* Time when the last ICMP error arrived */
	unsigned long	err_time;	/* Time when the last ICMP error
					 * arrived */

	/* These four fields used only by GRE */
	__u32		i_seqno;	/* The last seen seqno	*/
	__u32		o_seqno;	/* The last output seqno */
	int			hlen;		/* Precalculated GRE header length */
	int		hlen;		/* Precalculated header length */
	int		mlink;

	struct ip_tunnel_parm parms;
@@ -37,17 +61,100 @@ struct ip_tunnel {
#endif
	struct ip_tunnel_prl_entry __rcu *prl;	/* potential router list */
	unsigned int		prl_count;	/* # of entries in PRL */

	int			ip_tnl_net_id;
	struct gro_cells	gro_cells;
};

struct ip_tunnel_prl_entry {
	struct ip_tunnel_prl_entry __rcu *next;
	__be32				addr;
	u16				flags;
	struct rcu_head			rcu_head;
#define TUNNEL_CSUM	__cpu_to_be16(0x01)
#define TUNNEL_ROUTING	__cpu_to_be16(0x02)
#define TUNNEL_KEY	__cpu_to_be16(0x04)
#define TUNNEL_SEQ	__cpu_to_be16(0x08)
#define TUNNEL_STRICT	__cpu_to_be16(0x10)
#define TUNNEL_REC	__cpu_to_be16(0x20)
#define TUNNEL_VERSION	__cpu_to_be16(0x40)
#define TUNNEL_NO_KEY	__cpu_to_be16(0x80)

struct tnl_ptk_info {
	__be16 flags;
	__be16 proto;
	__be32 key;
	__be32 seq;
};

#define PACKET_RCVD	0
#define PACKET_REJECT	1

#define IP_TNL_HASH_BITS   10
#define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)

struct ip_tunnel_net {
	struct hlist_head *tunnels;
	struct net_device *fb_tunnel_dev;
};

int ip_tunnel_init(struct net_device *dev);
void ip_tunnel_uninit(struct net_device *dev);
void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
				  struct rtnl_link_ops *ops, char *devname);

void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn);

void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		    const struct iphdr *tnl_params);
int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);

struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
						struct rtnl_link_stats64 *tot);
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
				   int link, __be16 flags,
				   __be32 remote, __be32 local,
				   __be32 key);

int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
		  const struct tnl_ptk_info *tpi, bool log_ecn_error);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
			 struct ip_tunnel_parm *p);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
		      struct ip_tunnel_parm *p);
void ip_tunnel_setup(struct net_device *dev, int net_id);

/* Extract dsfield from inner protocol */
static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
				       const struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP))
		return iph->tos;
	else if (skb->protocol == htons(ETH_P_IPV6))
		return ipv6_get_dsfield((const struct ipv6hdr *)iph);
	else
		return 0;
}

/* Propogate ECN bits out */
static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
				     const struct sk_buff *skb)
{
	u8 inner = ip_tunnel_get_dsfield(iph, skb);

	return INET_ECN_encapsulate(tos, inner);
}

static inline void tunnel_ip_select_ident(struct sk_buff *skb,
					  const struct iphdr  *old_iph,
					  struct dst_entry *dst)
{
	struct iphdr *iph = ip_hdr(skb);

	/* Use inner packet iph-id if possible. */
	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
		iph->id	= old_iph->id;
	else
		__ip_select_ident(iph, dst,
				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
}

static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
	int err;
@@ -67,18 +174,4 @@ static inline void iptunnel_xmit(struct sk_buff *skb, struct net_device *dev)
		dev->stats.tx_aborted_errors++;
	}
}

static inline void tunnel_ip_select_ident(struct sk_buff *skb,
					  const struct iphdr  *old_iph,
					  struct dst_entry *dst)
{
	struct iphdr *iph = ip_hdr(skb);

	/* Use inner packet iph-id if possible. */
	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
		iph->id	= old_iph->id;
	else
		__ip_select_ident(iph, dst,
				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
}
#endif
#endif /* __NET_IP_TUNNELS_H */
Loading