Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 936faf6c authored by David S. Miller's avatar David S. Miller
Browse files


Stephen Hemminger says:

====================
Here is current updates for vxlan in net-next.  It includes Mike's changes
to handle multiple destinations and lots of little cosmetic stuff.

This is a fresh vxlan-next repository which was forked from net-next.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b1a5a34b ba609e9b
Loading
Loading
Loading
Loading
+354 −208
Original line number Diff line number Diff line
@@ -68,24 +68,26 @@ struct vxlanhdr {

/* UDP port for VXLAN traffic.
 * The IANA assigned port is 4789, but the Linux default is 8472
 * for compatability with early adopters.
 * for compatibility with early adopters.
 */
static unsigned int vxlan_port __read_mostly = 8472;
module_param_named(udp_port, vxlan_port, uint, 0444);
static unsigned short vxlan_port __read_mostly = 8472;
module_param_named(udp_port, vxlan_port, ushort, 0444);
MODULE_PARM_DESC(udp_port, "Destination UDP port");

static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");

static unsigned int vxlan_net_id;
static int vxlan_net_id;

static const u8 all_zeros_mac[ETH_ALEN];

/* per UDP socket information */
struct vxlan_sock {
	struct hlist_node hlist;
	struct rcu_head	  rcu;
	struct work_struct del_work;
	unsigned int	  refcnt;
	atomic_t	  refcnt;
	struct socket	  *sock;
	struct hlist_head vni_list[VNI_HASH_SIZE];
};
@@ -94,6 +96,7 @@ struct vxlan_sock {
struct vxlan_net {
	struct list_head  vxlan_list;
	struct hlist_head sock_list[PORT_HASH_SIZE];
	spinlock_t	  sock_lock;
};

struct vxlan_rdst {
@@ -101,7 +104,8 @@ struct vxlan_rdst {
	__be16			 remote_port;
	u32			 remote_vni;
	u32			 remote_ifindex;
	struct vxlan_rdst	*remote_next;
	struct list_head	 list;
	struct rcu_head		 rcu;
};

/* Forwarding table entry */
@@ -110,7 +114,7 @@ struct vxlan_fdb {
	struct rcu_head	  rcu;
	unsigned long	  updated;	/* jiffies */
	unsigned long	  used;
	struct vxlan_rdst remote;
	struct list_head  remotes;
	u16		  state;	/* see ndm_state */
	u8		  flags;	/* see ndm_flags */
	u8		  eth_addr[ETH_ALEN];
@@ -131,6 +135,9 @@ struct vxlan_dev {
	__u8		  ttl;
	u32		  flags;	/* VXLAN_F_* below */

	struct work_struct sock_work;
	struct work_struct igmp_work;

	unsigned long	  age_interval;
	struct timer_list age_timer;
	spinlock_t	  hash_lock;
@@ -148,6 +155,9 @@ struct vxlan_dev {

/* salt for hash table */
static u32 vxlan_salt __read_mostly;
static struct workqueue_struct *vxlan_wq;

static void vxlan_sock_work(struct work_struct *work);

/* Virtual Network hash table head */
static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
@@ -163,6 +173,14 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
	return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
}

/* First remote destination for a forwarding entry.
 * Guaranteed to be non-NULL because remotes are never deleted.
 */
static inline struct vxlan_rdst *first_remote(struct vxlan_fdb *fdb)
{
	return list_first_or_null_rcu(&fdb->remotes, struct vxlan_rdst, list);
}

/* Find VXLAN socket based on network namespace and UDP port */
static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
{
@@ -235,7 +253,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
		goto nla_put_failure;
	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
	    nla_put_be32(skb, NDA_VNI, rdst->remote_vni))
	    nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
		goto nla_put_failure;
	if (rdst->remote_ifindex &&
	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
@@ -268,7 +286,7 @@ static inline size_t vxlan_nlmsg_size(void)
}

static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
			     const struct vxlan_fdb *fdb, int type)
			     struct vxlan_fdb *fdb, int type)
{
	struct net *net = dev_net(vxlan->dev);
	struct sk_buff *skb;
@@ -278,7 +296,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
	if (skb == NULL)
		goto errout;

	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote);
	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, first_remote(fdb));
	if (err < 0) {
		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
		WARN_ON(err == -EMSGSIZE);
@@ -296,22 +314,27 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_fdb f;
	struct vxlan_fdb f = {
		.state = NUD_STALE,
	};
	struct vxlan_rdst remote = {
		.remote_ip = ipa, /* goes to NDA_DST */
		.remote_vni = VXLAN_N_VID,
	};

	memset(&f, 0, sizeof f);
	f.state = NUD_STALE;
	f.remote.remote_ip = ipa; /* goes to NDA_DST */
	f.remote.remote_vni = VXLAN_N_VID;
	INIT_LIST_HEAD(&f.remotes);
	list_add_rcu(&remote.list, &f.remotes);

	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
}

static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
{
	struct vxlan_fdb	f;
	struct vxlan_fdb f = {
		.state = NUD_STALE,
	};

	memset(&f, 0, sizeof f);
	f.state = NUD_STALE;
	INIT_LIST_HEAD(&f.remotes);
	memcpy(f.eth_addr, eth_addr, ETH_ALEN);

	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
@@ -366,21 +389,34 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
	return f;
}

/* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f,
			    __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
/* caller should hold vxlan->hash_lock */
static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
					      __be32 ip, __be16 port,
					      __u32 vni, __u32 ifindex)
{
	struct vxlan_rdst *rd_prev, *rd;
	struct vxlan_rdst *rd;

	rd_prev = NULL;
	for (rd = &f->remote; rd; rd = rd->remote_next) {
	list_for_each_entry(rd, &f->remotes, list) {
		if (rd->remote_ip == ip &&
		    rd->remote_port == port &&
		    rd->remote_vni == vni &&
		    rd->remote_ifindex == ifindex)
			return 0;
		rd_prev = rd;
			return rd;
	}

	return NULL;
}

/* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f,
			    __be32 ip, __be16 port, __u32 vni, __u32 ifindex)
{
	struct vxlan_rdst *rd;

	rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
	if (rd)
		return 0;

	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
	if (rd == NULL)
		return -ENOBUFS;
@@ -388,8 +424,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
	rd->remote_port = port;
	rd->remote_vni = vni;
	rd->remote_ifindex = ifindex;
	rd->remote_next = NULL;
	rd_prev->remote_next = rd;

	list_add_tail_rcu(&rd->list, &f->remotes);

	return 1;
}

@@ -421,7 +458,8 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
			notify = 1;
		}
		if ((flags & NLM_F_APPEND) &&
		    is_multicast_ether_addr(f->eth_addr)) {
		    (is_multicast_ether_addr(f->eth_addr) ||
		     is_zero_ether_addr(f->eth_addr))) {
			int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);

			if (rc < 0)
@@ -441,16 +479,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
			return -ENOMEM;

		notify = 1;
		f->remote.remote_ip = ip;
		f->remote.remote_port = port;
		f->remote.remote_vni = vni;
		f->remote.remote_ifindex = ifindex;
		f->remote.remote_next = NULL;
		f->state = state;
		f->flags = ndm_flags;
		f->updated = f->used = jiffies;
		INIT_LIST_HEAD(&f->remotes);
		memcpy(f->eth_addr, mac, ETH_ALEN);

		vxlan_fdb_append(f, ip, port, vni, ifindex);

		++vxlan->addrcnt;
		hlist_add_head_rcu(&f->hlist,
				   vxlan_fdb_head(vxlan, mac));
@@ -462,16 +498,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
	return 0;
}

static void vxlan_fdb_free_rdst(struct rcu_head *head)
{
	struct vxlan_rdst *rd = container_of(head, struct vxlan_rdst, rcu);
	kfree(rd);
}

static void vxlan_fdb_free(struct rcu_head *head)
{
	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
	struct vxlan_rdst *rd, *nd;

	while (f->remote.remote_next) {
		struct vxlan_rdst *rd = f->remote.remote_next;

		f->remote.remote_next = rd->remote_next;
	list_for_each_entry_safe(rd, nd, &f->remotes, list)
		kfree(rd);
	}
	kfree(f);
}

@@ -487,58 +526,77 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
	call_rcu(&f->rcu, vxlan_fdb_free);
}

/* Add static entry (via netlink) */
static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
			 struct net_device *dev,
			 const unsigned char *addr, u16 flags)
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
			   __be32 *ip, __be16 *port, u32 *vni, u32 *ifindex)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct net *net = dev_net(vxlan->dev);
	__be32 ip;
	__be16 port;
	u32 vni, ifindex;
	int err;

	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
		pr_info("RTM_NEWNEIGH with invalid state %#x\n",
			ndm->ndm_state);
		return -EINVAL;
	}

	if (tb[NDA_DST] == NULL)
		return -EINVAL;

	if (tb[NDA_DST]) {
		if (nla_len(tb[NDA_DST]) != sizeof(__be32))
			return -EAFNOSUPPORT;

	ip = nla_get_be32(tb[NDA_DST]);
		*ip = nla_get_be32(tb[NDA_DST]);
	} else {
		*ip = htonl(INADDR_ANY);
	}

	if (tb[NDA_PORT]) {
		if (nla_len(tb[NDA_PORT]) != sizeof(__be16))
			return -EINVAL;
		port = nla_get_be16(tb[NDA_PORT]);
	} else
		port = vxlan->dst_port;
		*port = nla_get_be16(tb[NDA_PORT]);
	} else {
		*port = vxlan->dst_port;
	}

	if (tb[NDA_VNI]) {
		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
			return -EINVAL;
		vni = nla_get_u32(tb[NDA_VNI]);
	} else
		vni = vxlan->default_dst.remote_vni;
		*vni = nla_get_u32(tb[NDA_VNI]);
	} else {
		*vni = vxlan->default_dst.remote_vni;
	}

	if (tb[NDA_IFINDEX]) {
		struct net_device *tdev;

		if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
			return -EINVAL;
		ifindex = nla_get_u32(tb[NDA_IFINDEX]);
		tdev = dev_get_by_index(net, ifindex);
		*ifindex = nla_get_u32(tb[NDA_IFINDEX]);
		tdev = dev_get_by_index(net, *ifindex);
		if (!tdev)
			return -EADDRNOTAVAIL;
		dev_put(tdev);
	} else
		ifindex = 0;
	} else {
		*ifindex = 0;
	}

	return 0;
}

/* Add static entry (via netlink) */
static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
			 struct net_device *dev,
			 const unsigned char *addr, u16 flags)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	/* struct net *net = dev_net(vxlan->dev); */
	__be32 ip;
	__be16 port;
	u32 vni, ifindex;
	int err;

	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
		pr_info("RTM_NEWNEIGH with invalid state %#x\n",
			ndm->ndm_state);
		return -EINVAL;
	}

	if (tb[NDA_DST] == NULL)
		return -EINVAL;

	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
	if (err)
		return err;

	spin_lock_bh(&vxlan->hash_lock);
	err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags,
@@ -555,14 +613,43 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_fdb *f;
	int err = -ENOENT;
	struct vxlan_rdst *rd = NULL;
	__be32 ip;
	__be16 port;
	u32 vni, ifindex;
	int err;

	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
	if (err)
		return err;

	err = -ENOENT;

	spin_lock_bh(&vxlan->hash_lock);
	f = vxlan_find_mac(vxlan, addr);
	if (f) {
		vxlan_fdb_destroy(vxlan, f);
	if (!f)
		goto out;

	if (ip != htonl(INADDR_ANY)) {
		rd = vxlan_fdb_find_rdst(f, ip, port, vni, ifindex);
		if (!rd)
			goto out;
	}

	err = 0;

	/* remove a destination if it's not the only one on the list,
	 * otherwise destroy the fdb entry
	 */
	if (rd && !list_is_singular(&f->remotes)) {
		list_del_rcu(&rd->list);
		call_rcu(&rd->rcu, vxlan_fdb_free_rdst);
		goto out;
	}

	vxlan_fdb_destroy(vxlan, f);

out:
	spin_unlock_bh(&vxlan->hash_lock);

	return err;
@@ -581,23 +668,24 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,

		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
			struct vxlan_rdst *rd;
			for (rd = &f->remote; rd; rd = rd->remote_next) {

			if (idx < cb->args[0])
				goto skip;

			list_for_each_entry_rcu(rd, &f->remotes, list) {
				err = vxlan_fdb_info(skb, vxlan, f,
						     NETLINK_CB(cb->skb).portid,
						     cb->nlh->nlmsg_seq,
						     RTM_NEWNEIGH,
						     NLM_F_MULTI, rd);
				if (err < 0)
					break;
					goto out;
			}
skip:
			++idx;
		}
	}
	}

out:
	return idx;
}

@@ -613,7 +701,9 @@ static bool vxlan_snoop(struct net_device *dev,

	f = vxlan_find_mac(vxlan, src_mac);
	if (likely(f)) {
		if (likely(f->remote.remote_ip == src_ip))
		struct vxlan_rdst *rdst = first_remote(f);

		if (likely(rdst->remote_ip == src_ip))
			return false;

		/* Don't migrate static entries, drop packets */
@@ -623,10 +713,11 @@ static bool vxlan_snoop(struct net_device *dev,
		if (net_ratelimit())
			netdev_info(dev,
				    "%pM migrated from %pI4 to %pI4\n",
				    src_mac, &f->remote.remote_ip, &src_ip);
				    src_mac, &rdst->remote_ip, &src_ip);

		f->remote.remote_ip = src_ip;
		rdst->remote_ip = src_ip;
		f->updated = jiffies;
		vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH);
	} else {
		/* learned new entry */
		spin_lock(&vxlan->hash_lock);
@@ -647,76 +738,61 @@ static bool vxlan_snoop(struct net_device *dev,


/* See if multicast group is already in use by other ID */
static bool vxlan_group_used(struct vxlan_net *vn,
			     const struct vxlan_dev *this)
static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip)
{
	struct vxlan_dev *vxlan;

	list_for_each_entry(vxlan, &vn->vxlan_list, next) {
		if (vxlan == this)
			continue;

		if (!netif_running(vxlan->dev))
			continue;

		if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip)
		if (vxlan->default_dst.remote_ip == remote_ip)
			return true;
	}

	return false;
}

/* kernel equivalent to IP_ADD_MEMBERSHIP */
static int vxlan_join_group(struct net_device *dev)
static void vxlan_sock_hold(struct vxlan_sock *vs)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	struct sock *sk = vxlan->vn_sock->sock->sk;
	struct ip_mreqn mreq = {
		.imr_multiaddr.s_addr	= vxlan->default_dst.remote_ip,
		.imr_ifindex		= vxlan->default_dst.remote_ifindex,
	};
	int err;
	atomic_inc(&vs->refcnt);
}

	/* Already a member of group */
	if (vxlan_group_used(vn, vxlan))
		return 0;
static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
{
	if (!atomic_dec_and_test(&vs->refcnt))
		return;

	/* Need to drop RTNL to call multicast join */
	rtnl_unlock();
	lock_sock(sk);
	err = ip_mc_join_group(sk, &mreq);
	release_sock(sk);
	rtnl_lock();
	spin_lock(&vn->sock_lock);
	hlist_del_rcu(&vs->hlist);
	spin_unlock(&vn->sock_lock);

	return err;
	queue_work(vxlan_wq, &vs->del_work);
}


/* kernel equivalent to IP_DROP_MEMBERSHIP */
static int vxlan_leave_group(struct net_device *dev)
/* Callback to update multicast group membership.
 * Scheduled when vxlan goes up/down.
 */
static void vxlan_igmp_work(struct work_struct *work)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	int err = 0;
	struct sock *sk = vxlan->vn_sock->sock->sk;
	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work);
	struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
	struct vxlan_sock *vs = vxlan->vn_sock;
	struct sock *sk = vs->sock->sk;
	struct ip_mreqn mreq = {
		.imr_multiaddr.s_addr	= vxlan->default_dst.remote_ip,
		.imr_ifindex		= vxlan->default_dst.remote_ifindex,
	};

	/* Only leave group when last vxlan is done. */
	if (vxlan_group_used(vn, vxlan))
		return 0;

	/* Need to drop RTNL to call multicast leave */
	rtnl_unlock();
	lock_sock(sk);
	err = ip_mc_leave_group(sk, &mreq);
	if (vxlan_group_used(vn, vxlan->default_dst.remote_ip))
		ip_mc_join_group(sk, &mreq);
	else
		ip_mc_leave_group(sk, &mreq);
	release_sock(sk);
	rtnl_lock();

	return err;
	vxlan_sock_release(vn, vs);
	dev_put(vxlan->dev);
}

/* Callback from net/ipv4/udp.c to receive packets */
@@ -873,7 +949,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
		}

		f = vxlan_find_mac(vxlan, n->ha);
		if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
		if (f && first_remote(f)->remote_ip == htonl(INADDR_ANY)) {
			/* bridge-local neighbor */
			neigh_release(n);
			goto out;
@@ -1015,7 +1091,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
	}
}

static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
			   struct vxlan_rdst *rdst, bool did_rsc)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -1039,7 +1115,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		if (did_rsc) {
			/* short-circuited back to local bridge */
			vxlan_encap_bypass(skb, vxlan, vxlan);
			return NETDEV_TX_OK;
			return;
		}
		goto drop;
	}
@@ -1095,7 +1171,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
		if (!dst_vxlan)
			goto tx_error;
		vxlan_encap_bypass(skb, vxlan, dst_vxlan);
		return NETDEV_TX_OK;
		return;
	}
	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
	vxh->vx_flags = htonl(VXLAN_FLAGS);
@@ -1123,7 +1199,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
			    IPPROTO_UDP, tos, ttl, df);
	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);

	return NETDEV_TX_OK;
	return;

drop:
	dev->stats.tx_dropped++;
@@ -1133,7 +1209,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
	dev->stats.tx_errors++;
tx_free:
	dev_kfree_skb(skb);
	return NETDEV_TX_OK;
}

/* Transmit local packets over Vxlan
@@ -1147,9 +1222,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct ethhdr *eth;
	bool did_rsc = false;
	struct vxlan_rdst *rdst0, *rdst;
	struct vxlan_rdst *rdst;
	struct vxlan_fdb *f;
	int rc1, rc;

	skb_reset_mac_header(skb);
	eth = eth_hdr(skb);
@@ -1168,33 +1242,28 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
	}

	if (f == NULL) {
		rdst0 = &vxlan->default_dst;

		if (rdst0->remote_ip == htonl(INADDR_ANY) &&
		    (vxlan->flags & VXLAN_F_L2MISS) &&
		f = vxlan_find_mac(vxlan, all_zeros_mac);
		if (f == NULL) {
			if ((vxlan->flags & VXLAN_F_L2MISS) &&
			    !is_multicast_ether_addr(eth->h_dest))
				vxlan_fdb_miss(vxlan, eth->h_dest);
	} else
		rdst0 = &f->remote;

	rc = NETDEV_TX_OK;
			dev->stats.tx_dropped++;
			dev_kfree_skb(skb);
			return NETDEV_TX_OK;
		}
	}

	/* if there are multiple destinations, send copies */
	for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
	list_for_each_entry_rcu(rdst, &f->remotes, list) {
		struct sk_buff *skb1;

		skb1 = skb_clone(skb, GFP_ATOMIC);
		if (skb1) {
			rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc);
			if (rc == NETDEV_TX_OK)
				rc = rc1;
		}
		if (skb1)
			vxlan_xmit_one(skb1, dev, rdst, did_rsc);
	}

	rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc);
	if (rc == NETDEV_TX_OK)
		rc = rc1;
	return rc;
	dev_kfree_skb(skb);
	return NETDEV_TX_OK;
}

/* Walk the forwarding table and purge stale entries */
@@ -1237,23 +1306,70 @@ static void vxlan_cleanup(unsigned long arg)
/* Setup stats when device is created */
static int vxlan_init(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	struct vxlan_sock *vs;
	__u32 vni = vxlan->default_dst.remote_vni;

	dev->tstats = alloc_percpu(struct pcpu_tstats);
	if (!dev->tstats)
		return -ENOMEM;

	spin_lock(&vn->sock_lock);
	vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
	if (vs) {
		/* If we have a socket with same port already, reuse it */
		atomic_inc(&vs->refcnt);
		vxlan->vn_sock = vs;
		hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
	} else {
		/* otherwise make new socket outside of RTNL */
		dev_hold(dev);
		queue_work(vxlan_wq, &vxlan->sock_work);
	}
	spin_unlock(&vn->sock_lock);

	return 0;
}

static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
{
	struct vxlan_fdb *f;

	spin_lock_bh(&vxlan->hash_lock);
	f = __vxlan_find_mac(vxlan, all_zeros_mac);
	if (f)
		vxlan_fdb_destroy(vxlan, f);
	spin_unlock_bh(&vxlan->hash_lock);
}

static void vxlan_uninit(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
	struct vxlan_sock *vs = vxlan->vn_sock;

	vxlan_fdb_delete_default(vxlan);

	if (vs)
		vxlan_sock_release(vn, vs);
	free_percpu(dev->tstats);
}

/* Start ageing timer and join group when device is brought up */
static int vxlan_open(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	int err;
	struct vxlan_sock *vs = vxlan->vn_sock;

	/* socket hasn't been created */
	if (!vs)
		return -ENOTCONN;

	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
		err = vxlan_join_group(dev);
		if (err)
			return err;
		vxlan_sock_hold(vs);
		dev_hold(dev);
		queue_work(vxlan_wq, &vxlan->igmp_work);
	}

	if (vxlan->age_interval)
@@ -1273,6 +1389,8 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
			struct vxlan_fdb *f
				= container_of(p, struct vxlan_fdb, hlist);
			/* the all_zeros_mac entry is deleted at vxlan_uninit */
			if (!is_zero_ether_addr(f->eth_addr))
				vxlan_fdb_destroy(vxlan, f);
		}
	}
@@ -1283,9 +1401,13 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
static int vxlan_stop(struct net_device *dev)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_sock *vs = vxlan->vn_sock;

	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)))
		vxlan_leave_group(dev);
	if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
		vxlan_sock_hold(vs);
		dev_hold(dev);
		queue_work(vxlan_wq, &vxlan->igmp_work);
	}

	del_timer_sync(&vxlan->age_timer);

@@ -1301,6 +1423,7 @@ static void vxlan_set_multicast_list(struct net_device *dev)

static const struct net_device_ops vxlan_netdev_ops = {
	.ndo_init		= vxlan_init,
	.ndo_uninit		= vxlan_uninit,
	.ndo_open		= vxlan_open,
	.ndo_stop		= vxlan_stop,
	.ndo_start_xmit		= vxlan_xmit,
@@ -1319,12 +1442,6 @@ static struct device_type vxlan_type = {
	.name = "vxlan",
};

static void vxlan_free(struct net_device *dev)
{
	free_percpu(dev->tstats);
	free_netdev(dev);
}

/* Initialize the device structure. */
static void vxlan_setup(struct net_device *dev)
{
@@ -1337,7 +1454,7 @@ static void vxlan_setup(struct net_device *dev)
	dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM;

	dev->netdev_ops = &vxlan_netdev_ops;
	dev->destructor = vxlan_free;
	dev->destructor = free_netdev;
	SET_NETDEV_DEVTYPE(dev, &vxlan_type);

	dev->tx_queue_len = 0;
@@ -1354,6 +1471,8 @@ static void vxlan_setup(struct net_device *dev)

	INIT_LIST_HEAD(&vxlan->next);
	spin_lock_init(&vxlan->hash_lock);
	INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
	INIT_WORK(&vxlan->sock_work, vxlan_sock_work);

	init_timer_deferrable(&vxlan->age_timer);
	vxlan->age_timer.function = vxlan_cleanup;
@@ -1445,7 +1564,6 @@ static void vxlan_del_work(struct work_struct *work)
	kfree_rcu(vs, rcu);
}

/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
{
	struct vxlan_sock *vs;
@@ -1453,6 +1571,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
	struct sockaddr_in vxlan_addr = {
		.sin_family = AF_INET,
		.sin_addr.s_addr = htonl(INADDR_ANY),
		.sin_port = port,
	};
	int rc;
	unsigned int h;
@@ -1478,8 +1597,6 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
	sk = vs->sock->sk;
	sk_change_net(sk, net);

	vxlan_addr.sin_port = port;

	rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr,
			 sizeof(vxlan_addr));
	if (rc < 0) {
@@ -1497,18 +1614,57 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
	udp_sk(sk)->encap_type = 1;
	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
	udp_encap_enable();
	atomic_set(&vs->refcnt, 1);

	vs->refcnt = 1;
	return vs;
}

/* Scheduled at device creation to bind to a socket */
static void vxlan_sock_work(struct work_struct *work)
{
	struct vxlan_dev *vxlan
		= container_of(work, struct vxlan_dev, sock_work);
	struct net_device *dev = vxlan->dev;
	struct net *net = dev_net(dev);
	__u32 vni = vxlan->default_dst.remote_vni;
	__be16 port = vxlan->dst_port;
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_sock *nvs, *ovs;

	nvs = vxlan_socket_create(net, port);
	if (IS_ERR(nvs)) {
		netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
			   PTR_ERR(nvs));
		goto out;
	}

	spin_lock(&vn->sock_lock);
	/* Look again to see if can reuse socket */
	ovs = vxlan_find_port(net, port);
	if (ovs) {
		atomic_inc(&ovs->refcnt);
		vxlan->vn_sock = ovs;
		hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
		spin_unlock(&vn->sock_lock);

		sk_release_kernel(nvs->sock->sk);
		kfree(nvs);
	} else {
		vxlan->vn_sock = nvs;
		hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
		hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
		spin_unlock(&vn->sock_lock);
	}
out:
	dev_put(dev);
}

static int vxlan_newlink(struct net *net, struct net_device *dev,
			 struct nlattr *tb[], struct nlattr *data[])
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_rdst *dst = &vxlan->default_dst;
	struct vxlan_sock *vs;
	__u32 vni;
	int err;

@@ -1586,36 +1742,25 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
		return -EEXIST;
	}

	vs = vxlan_find_port(net, vxlan->dst_port);
	if (vs)
		++vs->refcnt;
	else {
		/* Drop lock because socket create acquires RTNL lock */
		rtnl_unlock();
		vs = vxlan_socket_create(net, vxlan->dst_port);
		rtnl_lock();
		if (IS_ERR(vs))
			return PTR_ERR(vs);

		hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
	}
	vxlan->vn_sock = vs;

	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);

	/* create an fdb entry for default destination */
	err = vxlan_fdb_create(vxlan, all_zeros_mac,
			       vxlan->default_dst.remote_ip,
			       NUD_REACHABLE|NUD_PERMANENT,
			       NLM_F_EXCL|NLM_F_CREATE,
			       vxlan->dst_port, vxlan->default_dst.remote_vni,
			       vxlan->default_dst.remote_ifindex, NTF_SELF);
	if (err)
		return err;

	err = register_netdevice(dev);
	if (err) {
		if (--vs->refcnt == 0) {
			rtnl_unlock();
			sk_release_kernel(vs->sock->sk);
			kfree(vs);
			rtnl_lock();
		}
		vxlan_fdb_delete_default(vxlan);
		return err;
	}

	list_add(&vxlan->next, &vn->vxlan_list);
	hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));

	return 0;
}
@@ -1623,16 +1768,10 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
{
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_sock *vs = vxlan->vn_sock;

	hlist_del_rcu(&vxlan->hlist);
	list_del(&vxlan->next);
	unregister_netdevice_queue(dev, head);

	if (--vs->refcnt == 0) {
		hlist_del_rcu(&vs->hlist);
		schedule_work(&vs->del_work);
	}
}

static size_t vxlan_get_size(const struct net_device *dev)
@@ -1721,6 +1860,7 @@ static __net_init int vxlan_init_net(struct net *net)
	unsigned int h;

	INIT_LIST_HEAD(&vn->vxlan_list);
	spin_lock_init(&vn->sock_lock);

	for (h = 0; h < PORT_HASH_SIZE; ++h)
		INIT_HLIST_HEAD(&vn->sock_list[h]);
@@ -1750,6 +1890,10 @@ static int __init vxlan_init_module(void)
{
	int rc;

	vxlan_wq = alloc_workqueue("vxlan", 0, 0);
	if (!vxlan_wq)
		return -ENOMEM;

	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));

	rc = register_pernet_device(&vxlan_net_ops);
@@ -1765,14 +1909,16 @@ static int __init vxlan_init_module(void)
out2:
	unregister_pernet_device(&vxlan_net_ops);
out1:
	destroy_workqueue(vxlan_wq);
	return rc;
}
late_initcall(vxlan_init_module);

static void __exit vxlan_cleanup_module(void)
{
	rtnl_link_unregister(&vxlan_link_ops);
	unregister_pernet_device(&vxlan_net_ops);
	rtnl_link_unregister(&vxlan_link_ops);
	destroy_workqueue(vxlan_wq);
	rcu_barrier();
}
module_exit(vxlan_cleanup_module);
+5 −0
Original line number Diff line number Diff line
@@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
		}
	}

	if (is_zero_ether_addr(addr)) {
		pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
		return -EINVAL;
	}

	p = br_port_get_rtnl(dev);
	if (p == NULL) {
		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
+0 −8

File changed.

Preview size limit exceeded, changes collapsed.