Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a2f1483b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vxlan-fix-default-fdb-entry-user-space-notify-ordering-race'



Roopa Prabhu says:

====================
vxlan: fix default fdb entry user-space notify ordering/race

Problem:
In vxlan_newlink, a default fdb entry is added before register_netdev.
The default fdb creation function notifies user-space of the
fdb entry on the vxlan device which user-space does not know about yet.
(RTM_NEWNEIGH goes before RTM_NEWLINK for the same ifindex).

This series fixes the user-space netlink notification ordering issue
with the following changes:
- decouple fdb notify from fdb create.
- Move fdb notify after register_netdev.
- modify rtnl_configure_link to allow configuring a link early.
- Call rtnl_configure_link in vxlan newlink handler to notify
userspace about the newlink before fdb notify and
hence avoiding the user-space race.
====================

Fixes: afbd8bae ("vxlan: add implicit fdb entry for default destination")
Signed-off-by: default avatarRoopa Prabhu <roopa@cumulusnetworks.com>
parents 22dd1491 0241b836
Loading
Loading
Loading
Loading
+87 −39
Original line number Diff line number Diff line
@@ -637,8 +637,61 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}

/* Add new entry to forwarding table -- assumes lock held */
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan,
					 const u8 *mac, __u16 state,
					 __be32 src_vni, __u8 ndm_flags)
{
	struct vxlan_fdb *f;

	f = kmalloc(sizeof(*f), GFP_ATOMIC);
	if (!f)
		return NULL;
	f->state = state;
	f->flags = ndm_flags;
	f->updated = f->used = jiffies;
	f->vni = src_vni;
	INIT_LIST_HEAD(&f->remotes);
	memcpy(f->eth_addr, mac, ETH_ALEN);

	return f;
}

static int vxlan_fdb_create(struct vxlan_dev *vxlan,
			    const u8 *mac, union vxlan_addr *ip,
			    __u16 state, __be16 port, __be32 src_vni,
			    __be32 vni, __u32 ifindex, __u8 ndm_flags,
			    struct vxlan_fdb **fdb)
{
	struct vxlan_rdst *rd = NULL;
	struct vxlan_fdb *f;
	int rc;

	if (vxlan->cfg.addrmax &&
	    vxlan->addrcnt >= vxlan->cfg.addrmax)
		return -ENOSPC;

	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
	f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
	if (!f)
		return -ENOMEM;

	rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
	if (rc < 0) {
		kfree(f);
		return rc;
	}

	++vxlan->addrcnt;
	hlist_add_head_rcu(&f->hlist,
			   vxlan_fdb_head(vxlan, mac, src_vni));

	*fdb = f;

	return 0;
}

/* Add new entry to forwarding table -- assumes lock held */
static int vxlan_fdb_update(struct vxlan_dev *vxlan,
			    const u8 *mac, union vxlan_addr *ip,
			    __u16 state, __u16 flags,
			    __be16 port, __be32 src_vni, __be32 vni,
@@ -688,37 +741,17 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
		if (!(flags & NLM_F_CREATE))
			return -ENOENT;

		if (vxlan->cfg.addrmax &&
		    vxlan->addrcnt >= vxlan->cfg.addrmax)
			return -ENOSPC;

		/* Disallow replace to add a multicast entry */
		if ((flags & NLM_F_REPLACE) &&
		    (is_multicast_ether_addr(mac) || is_zero_ether_addr(mac)))
			return -EOPNOTSUPP;

		netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
		f = kmalloc(sizeof(*f), GFP_ATOMIC);
		if (!f)
			return -ENOMEM;

		notify = 1;
		f->state = state;
		f->flags = ndm_flags;
		f->updated = f->used = jiffies;
		f->vni = src_vni;
		INIT_LIST_HEAD(&f->remotes);
		memcpy(f->eth_addr, mac, ETH_ALEN);

		rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
		if (rc < 0) {
			kfree(f);
		rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
				      vni, ifindex, ndm_flags, &f);
		if (rc < 0)
			return rc;
		}

		++vxlan->addrcnt;
		hlist_add_head_rcu(&f->hlist,
				   vxlan_fdb_head(vxlan, mac, src_vni));
		notify = 1;
	}

	if (notify) {
@@ -742,12 +775,14 @@ static void vxlan_fdb_free(struct rcu_head *head)
	kfree(f);
}

static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
			      bool do_notify)
{
	netdev_dbg(vxlan->dev,
		    "delete %pM\n", f->eth_addr);

	--vxlan->addrcnt;
	if (do_notify)
		vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);

	hlist_del_rcu(&f->hlist);
@@ -864,7 +899,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
		return -EAFNOSUPPORT;

	spin_lock_bh(&vxlan->hash_lock);
	err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags,
	err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
			       port, src_vni, vni, ifindex, ndm->ndm_flags);
	spin_unlock_bh(&vxlan->hash_lock);

@@ -898,7 +933,7 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
		goto out;
	}

	vxlan_fdb_destroy(vxlan, f);
	vxlan_fdb_destroy(vxlan, f, true);

out:
	return 0;
@@ -1007,7 +1042,7 @@ static bool vxlan_snoop(struct net_device *dev,

		/* close off race between vxlan_flush and incoming packets */
		if (netif_running(dev))
			vxlan_fdb_create(vxlan, src_mac, src_ip,
			vxlan_fdb_update(vxlan, src_mac, src_ip,
					 NUD_REACHABLE,
					 NLM_F_EXCL|NLM_F_CREATE,
					 vxlan->cfg.dst_port,
@@ -2366,7 +2401,7 @@ static void vxlan_cleanup(struct timer_list *t)
					   "garbage collect %pM\n",
					   f->eth_addr);
				f->state = NUD_STALE;
				vxlan_fdb_destroy(vxlan, f);
				vxlan_fdb_destroy(vxlan, f, true);
			} else if (time_before(timeout, next_timer))
				next_timer = timeout;
		}
@@ -2417,7 +2452,7 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni)
	spin_lock_bh(&vxlan->hash_lock);
	f = __vxlan_find_mac(vxlan, all_zeros_mac, vni);
	if (f)
		vxlan_fdb_destroy(vxlan, f);
		vxlan_fdb_destroy(vxlan, f, true);
	spin_unlock_bh(&vxlan->hash_lock);
}

@@ -2471,7 +2506,7 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
				continue;
			/* the all_zeros_mac entry is deleted at vxlan_uninit */
			if (!is_zero_ether_addr(f->eth_addr))
				vxlan_fdb_destroy(vxlan, f);
				vxlan_fdb_destroy(vxlan, f, true);
		}
	}
	spin_unlock_bh(&vxlan->hash_lock);
@@ -3162,6 +3197,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
{
	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
	struct vxlan_dev *vxlan = netdev_priv(dev);
	struct vxlan_fdb *f = NULL;
	int err;

	err = vxlan_dev_configure(net, dev, conf, false, extack);
@@ -3175,24 +3211,35 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
		err = vxlan_fdb_create(vxlan, all_zeros_mac,
				       &vxlan->default_dst.remote_ip,
				       NUD_REACHABLE | NUD_PERMANENT,
				       NLM_F_EXCL | NLM_F_CREATE,
				       vxlan->cfg.dst_port,
				       vxlan->default_dst.remote_vni,
				       vxlan->default_dst.remote_vni,
				       vxlan->default_dst.remote_ifindex,
				       NTF_SELF);
				       NTF_SELF, &f);
		if (err)
			return err;
	}

	err = register_netdevice(dev);
	if (err)
		goto errout;

	err = rtnl_configure_link(dev, NULL);
	if (err) {
		vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni);
		return err;
		unregister_netdevice(dev);
		goto errout;
	}

	/* notify default fdb entry */
	if (f)
		vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);

	list_add(&vxlan->next, &vn->vxlan_list);
	return 0;
errout:
	if (f)
		vxlan_fdb_destroy(vxlan, f, false);
	return err;
}

static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
@@ -3427,6 +3474,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
	struct vxlan_rdst *dst = &vxlan->default_dst;
	struct vxlan_rdst old_dst;
	struct vxlan_config conf;
	struct vxlan_fdb *f = NULL;
	int err;

	err = vxlan_nl2conf(tb, data,
@@ -3455,16 +3503,16 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
			err = vxlan_fdb_create(vxlan, all_zeros_mac,
					       &dst->remote_ip,
					       NUD_REACHABLE | NUD_PERMANENT,
					       NLM_F_CREATE | NLM_F_APPEND,
					       vxlan->cfg.dst_port,
					       dst->remote_vni,
					       dst->remote_vni,
					       dst->remote_ifindex,
					       NTF_SELF);
					       NTF_SELF, &f);
			if (err) {
				spin_unlock_bh(&vxlan->hash_lock);
				return err;
			}
			vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH);
		}
		spin_unlock_bh(&vxlan->hash_lock);
	}
+6 −3
Original line number Diff line number Diff line
@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
			return err;
	}

	if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
		__dev_notify_flags(dev, old_flags, 0U);
	} else {
		dev->rtnl_link_state = RTNL_LINK_INITIALIZED;

		__dev_notify_flags(dev, old_flags, ~0U);
	}
	return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);