Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c3f1010b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vrf-pktinfo'



David Ahern says:

====================
net: vrf: Fixup PKTINFO to return enslaved device index

Applications such as OSPF and BFD need the original ingress device not
the VRF device; the latter can be derived from the former. To that end
move the packet intercept from an rx handler that is invoked by
__netif_receive_skb_core to the ipv4 and ipv6 receive processing.

IPv6 already saves the skb_iif to the control buffer in ipv6_rcv. Since
the skb->dev has not been switched the cb has the enslaved device. Make
the same happen for IPv4 by adding the skb_iif to inet_skb_parm and set
it in ipv4 code after clearing the skb control buffer similar to IPv6.
From there the pktinfo can just pull it from cb with the PKTINFO_SKB_CB
cast.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ca4aa976 0b922b7a
Loading
Loading
Loading
Loading
+91 −98
Original line number Diff line number Diff line
@@ -42,9 +42,6 @@
#define DRV_NAME	"vrf"
#define DRV_VERSION	"1.0"

#define vrf_master_get_rcu(dev) \
	((struct net_device *)rcu_dereference(dev->rx_handler_data))

struct net_vrf {
	struct rtable           *rth;
	struct rt6_info		*rt6;
@@ -60,90 +57,12 @@ struct pcpu_dstats {
	struct u64_stats_sync	syncp;
};

/* neighbor handling is done with actual device; do not want
 * to flip skb->dev for those ndisc packets. This really fails
 * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
 * a start.
 */
#if IS_ENABLED(CONFIG_IPV6)
static bool check_ipv6_frame(const struct sk_buff *skb)
{
	const struct ipv6hdr *ipv6h;
	struct ipv6hdr _ipv6h;
	bool rc = true;

	ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
	if (!ipv6h)
		goto out;

	if (ipv6h->nexthdr == NEXTHDR_ICMP) {
		const struct icmp6hdr *icmph;
		struct icmp6hdr _icmph;

		icmph = skb_header_pointer(skb, sizeof(_ipv6h),
					   sizeof(_icmph), &_icmph);
		if (!icmph)
			goto out;

		switch (icmph->icmp6_type) {
		case NDISC_ROUTER_SOLICITATION:
		case NDISC_ROUTER_ADVERTISEMENT:
		case NDISC_NEIGHBOUR_SOLICITATION:
		case NDISC_NEIGHBOUR_ADVERTISEMENT:
		case NDISC_REDIRECT:
			rc = false;
			break;
		}
	}

out:
	return rc;
}
#else
static bool check_ipv6_frame(const struct sk_buff *skb)
{
	return false;
}
#endif

static bool is_ip_rx_frame(struct sk_buff *skb)
{
	switch (skb->protocol) {
	case htons(ETH_P_IP):
		return true;
	case htons(ETH_P_IPV6):
		return check_ipv6_frame(skb);
	}
	return false;
}

static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb)
{
	vrf_dev->stats.tx_errors++;
	kfree_skb(skb);
}

/* note: already called with rcu_read_lock */
static rx_handler_result_t vrf_handle_frame(struct sk_buff **pskb)
{
	struct sk_buff *skb = *pskb;

	if (is_ip_rx_frame(skb)) {
		struct net_device *dev = vrf_master_get_rcu(skb->dev);
		struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats);

		u64_stats_update_begin(&dstats->syncp);
		dstats->rx_pkts++;
		dstats->rx_bytes += skb->len;
		u64_stats_update_end(&dstats->syncp);

		skb->dev = dev;

		return RX_HANDLER_ANOTHER;
	}
	return RX_HANDLER_PASS;
}

static struct rtnl_link_stats64 *vrf_get_stats64(struct net_device *dev,
						 struct rtnl_link_stats64 *stats)
{
@@ -506,28 +425,14 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
{
	int ret;

	/* register the packet handler for slave ports */
	ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
	if (ret) {
		netdev_err(port_dev,
			   "Device %s failed to register rx_handler\n",
			   port_dev->name);
		goto out_fail;
	}

	ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
	if (ret < 0)
		goto out_unregister;
		return ret;

	port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
	cycle_netdev(port_dev);

	return 0;

out_unregister:
	netdev_rx_handler_unregister(port_dev);
out_fail:
	return ret;
}

static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
@@ -544,8 +449,6 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
	netdev_upper_dev_unlink(port_dev, dev);
	port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;

	netdev_rx_handler_unregister(port_dev);

	cycle_netdev(port_dev);

	return 0;
@@ -669,6 +572,95 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
	return rc;
}

#if IS_ENABLED(CONFIG_IPV6)
/* neighbor handling is done with actual device; do not want
 * to flip skb->dev for those ndisc packets. This really fails
 * for multiple next protocols (e.g., NEXTHDR_HOP). But it is
 * a start.
 */
static bool ipv6_ndisc_frame(const struct sk_buff *skb)
{
	const struct ipv6hdr *iph = ipv6_hdr(skb);
	bool rc = false;

	if (iph->nexthdr == NEXTHDR_ICMP) {
		const struct icmp6hdr *icmph;
		struct icmp6hdr _icmph;

		icmph = skb_header_pointer(skb, sizeof(*iph),
					   sizeof(_icmph), &_icmph);
		if (!icmph)
			goto out;

		switch (icmph->icmp6_type) {
		case NDISC_ROUTER_SOLICITATION:
		case NDISC_ROUTER_ADVERTISEMENT:
		case NDISC_NEIGHBOUR_SOLICITATION:
		case NDISC_NEIGHBOUR_ADVERTISEMENT:
		case NDISC_REDIRECT:
			rc = true;
			break;
		}
	}

out:
	return rc;
}

static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
				   struct sk_buff *skb)
{
	/* if packet is NDISC keep the ingress interface */
	if (!ipv6_ndisc_frame(skb)) {
		skb->dev = vrf_dev;
		skb->skb_iif = vrf_dev->ifindex;

		skb_push(skb, skb->mac_len);
		dev_queue_xmit_nit(skb, vrf_dev);
		skb_pull(skb, skb->mac_len);

		IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
	}

	return skb;
}

#else
static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
				   struct sk_buff *skb)
{
	return skb;
}
#endif

static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
				  struct sk_buff *skb)
{
	skb->dev = vrf_dev;
	skb->skb_iif = vrf_dev->ifindex;

	skb_push(skb, skb->mac_len);
	dev_queue_xmit_nit(skb, vrf_dev);
	skb_pull(skb, skb->mac_len);

	return skb;
}

/* called with rcu lock held */
static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
				  struct sk_buff *skb,
				  u16 proto)
{
	switch (proto) {
	case AF_INET:
		return vrf_ip_rcv(vrf_dev, skb);
	case AF_INET6:
		return vrf_ip6_rcv(vrf_dev, skb);
	}

	return skb;
}

#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
					 const struct flowi6 *fl6)
@@ -690,6 +682,7 @@ static const struct l3mdev_ops vrf_l3mdev_ops = {
	.l3mdev_fib_table	= vrf_fib_table,
	.l3mdev_get_rtable	= vrf_get_rtable,
	.l3mdev_get_saddr	= vrf_get_saddr,
	.l3mdev_l3_rcv		= vrf_l3_rcv,
#if IS_ENABLED(CONFIG_IPV6)
	.l3mdev_get_rt6_dst	= vrf_get_rt6_dst,
#endif
+16 −1
Original line number Diff line number Diff line
@@ -118,14 +118,29 @@ struct inet6_skb_parm {
#define IP6SKB_ROUTERALERT	8
#define IP6SKB_FRAGMENTED      16
#define IP6SKB_HOPBYHOP        32
#define IP6SKB_L3SLAVE         64
};

#if defined(CONFIG_NET_L3_MASTER_DEV)
static inline bool skb_l3mdev_slave(__u16 flags)
{
	return flags & IP6SKB_L3SLAVE;
}
#else
static inline bool skb_l3mdev_slave(__u16 flags)
{
	return false;
}
#endif

#define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
#define IP6CBMTU(skb)	((struct ip6_mtuinfo *)((skb)->cb))

static inline int inet6_iif(const struct sk_buff *skb)
{
	return IP6CB(skb)->iif;
	bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags);

	return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
}

struct tcp6_request_sock {
+2 −0
Original line number Diff line number Diff line
@@ -3258,6 +3258,8 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
			const struct sk_buff *skb);

void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);

extern int		netdev_budget;

/* Called by rtnetlink.c:rtnl_unlock() */
+1 −0
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
struct sock;

struct inet_skb_parm {
	int			iif;
	struct ip_options	opt;		/* Compiled IP options		*/
	unsigned char		flags;

+42 −0
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@

struct l3mdev_ops {
	u32		(*l3mdev_fib_table)(const struct net_device *dev);
	struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
					  struct sk_buff *skb, u16 proto);

	/* IPv4 ops */
	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
@@ -134,6 +136,34 @@ int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);

struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6);

static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
{
	struct net_device *master = NULL;

	if (netif_is_l3_slave(skb->dev))
		master = netdev_master_upper_dev_get_rcu(skb->dev);
	else if (netif_is_l3_master(skb->dev))
		master = skb->dev;

	if (master && master->l3mdev_ops->l3mdev_l3_rcv)
		skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto);

	return skb;
}

static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
	return l3mdev_l3_rcv(skb, AF_INET);
}

static inline
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
{
	return l3mdev_l3_rcv(skb, AF_INET6);
}

#else

static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -194,6 +224,18 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6)
{
	return NULL;
}

static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
	return skb;
}

static inline
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
{
	return skb;
}
#endif

#endif /* _NET_L3MDEV_H_ */
Loading