Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8fee3156 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'vrf-tx-hook'



David Ahern says:

====================
net: Convert vrf to tx hook

The motivation for this series is that ICMP Unreachable - Fragmentation
Needed packets are not handled properly for VRFs. Specifically, the
FIB lookup in __ip_rt_update_pmtu fails so no nexthop exception is
created with the reduced MTU. As a result connections stall if packets
larger than the smallest MTU in the path are generated.

While investigating that problem I also noticed that the MSS for all
connections in a VRF is based on the VRF device's MTU and not the
route the packets ultimately go through. VRF currently uses a dst
to direct packets to the device. The first FIB lookup returns this dst
and then the lookup in the VRF driver gets the actual output route. A
side effect of this design is that the VRF dst is cached on sockets
and then used for calculations like the MSS.

This series fixes this problem by removing the hook in the FIB lookups
that returns the dst pointing to the VRF device to the VRF and always
doing the actual FIB lookup. This allows the real dst to be used
throughout the stack (for example the MSS). Packets are diverted to
the VRF device on Tx using an l3mdev hook in the output path similar to
to what is done for Rx. The end result is a simpler implementation for
VRF with fewer intrusions into the network stack and symmetrical packet
handling for Rx and Tx paths.

Comparison of netperf performance for a build without l3mdev (best case
performance), the old vrf driver and the VRF driver from this series.
Data are collected using VMs with virtio + vhost. The netperf client
runs in the VM and netserver runs in the host. 1-byte RR tests are done
as these packets exaggerate the performance hit due to the extra lookups
done for l3mdev and VRF.

Command: netperf -cC -H ${ip} -l 60 -t {TCP,UDP}_RR [-J red]

                      TCP_RR              UDP_RR
                   IPv4     IPv6       IPv4     IPv6
no l3mdev        29,996   30,601     31,638   24,336
vrf old          27,417   27,626     29,159   24,801
vrf new          28,036   28,372     30,110   24,857
l3mdev, no vrf   29,534   30,465     30,670   24,346

 * Transactions per second as reported by netperf
 * netperf modified to take a bind-to-device argument -- the -J red option

1. 'no l3mdev'      == NET_L3_MASTER_DEV is unset so code is compiled out
2. 'vrf old'        == data for existing implementation
3. 'vrf new'        == data with this series
4. 'l3mdev, no vrf' == NET_L3_MASTER_DEV is enabled but traffic is not
                       going through a VRF

About the series
- patch 1 adds the flow update (changing oif or iif to L3 master device
  and setting the flag to skip the oif check) to ipv4 and ipv6 paths just
  before hitting the rules. This catches all code paths in a single spot.

- patch 2 adds the Tx hook to push the packet to the l3mdev if relevant

- patch 3 adds some checks so the vrf device can act as a vrf-local
  loopback. These changes were not needed before since the vrf dst was
  returned from the lookup.

- patches 4 and 5 flip the ipv4 and ipv6 stacks to the tx hook leaving
  the route lookup to be the real one. The dst flip happens at the
  beginning of the L3 output path so the VRFs can have device based
  features such as netfilter, tc and tcpdump.

- patches 6-11 remove no longer needed l3mdev code

v2
- properly handle IPv6 link scope addresses

- keep the device xmit path and associated dst which is switched in by
  the l3_out hook. packets still need to go through the xmit path in
  case the user puts a qdisc on the vrf device and to allow tc rules.
  version 1 short circuited the tx handling and only covered netfilter
  and tcpdump.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cf9932a9 c71ad3d4
Loading
Loading
Loading
Loading
+146 −145
Original line number Original line Diff line number Diff line
@@ -137,6 +137,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
}
}


#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IPV6)
static int vrf_ip6_local_out(struct net *net, struct sock *sk,
			     struct sk_buff *skb)
{
	int err;

	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
		      sk, skb, NULL, skb_dst(skb)->dev, dst_output);

	if (likely(err == 1))
		err = dst_output(net, sk, skb);

	return err;
}

static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
					   struct net_device *dev)
					   struct net_device *dev)
{
{
@@ -151,7 +165,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
		.flowlabel = ip6_flowinfo(iph),
		.flowlabel = ip6_flowinfo(iph),
		.flowi6_mark = skb->mark,
		.flowi6_mark = skb->mark,
		.flowi6_proto = iph->nexthdr,
		.flowi6_proto = iph->nexthdr,
		.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
		.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
	};
	};
	int ret = NET_XMIT_DROP;
	int ret = NET_XMIT_DROP;
	struct dst_entry *dst;
	struct dst_entry *dst;
@@ -207,7 +221,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
	/* strip the ethernet header added for pass through VRF device */
	/* strip the ethernet header added for pass through VRF device */
	__skb_pull(skb, skb_network_offset(skb));
	__skb_pull(skb, skb_network_offset(skb));


	ret = ip6_local_out(net, skb->sk, skb);
	ret = vrf_ip6_local_out(net, skb->sk, skb);
	if (unlikely(net_xmit_eval(ret)))
	if (unlikely(net_xmit_eval(ret)))
		dev->stats.tx_errors++;
		dev->stats.tx_errors++;
	else
	else
@@ -227,6 +241,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
}
}
#endif
#endif


/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
static int vrf_ip_local_out(struct net *net, struct sock *sk,
			    struct sk_buff *skb)
{
	int err;

	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
		      skb, NULL, skb_dst(skb)->dev, dst_output);
	if (likely(err == 1))
		err = dst_output(net, sk, skb);

	return err;
}

static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
					   struct net_device *vrf_dev)
					   struct net_device *vrf_dev)
{
{
@@ -237,8 +265,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
		.flowi4_oif = vrf_dev->ifindex,
		.flowi4_oif = vrf_dev->ifindex,
		.flowi4_iif = LOOPBACK_IFINDEX,
		.flowi4_iif = LOOPBACK_IFINDEX,
		.flowi4_tos = RT_TOS(ip4h->tos),
		.flowi4_tos = RT_TOS(ip4h->tos),
		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
				FLOWI_FLAG_SKIP_NH_OIF,
		.daddr = ip4h->daddr,
		.daddr = ip4h->daddr,
	};
	};
	struct net *net = dev_net(vrf_dev);
	struct net *net = dev_net(vrf_dev);
@@ -292,7 +319,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
					       RT_SCOPE_LINK);
					       RT_SCOPE_LINK);
	}
	}


	ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	if (unlikely(net_xmit_eval(ret)))
	if (unlikely(net_xmit_eval(ret)))
		vrf_dev->stats.tx_errors++;
		vrf_dev->stats.tx_errors++;
	else
	else
@@ -377,6 +404,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb)
			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
}


/* set dst on skb to send packet to us via dev_xmit path. Allows
 * packet to go through device based features such as qdisc, netfilter
 * hooks and packet sockets with skb->dev set to vrf device.
 */
static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
				   struct sock *sk,
				   struct sk_buff *skb)
{
	struct net_vrf *vrf = netdev_priv(vrf_dev);
	struct dst_entry *dst = NULL;
	struct rt6_info *rt6;

	/* don't divert link scope packets */
	if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
		return skb;

	rcu_read_lock();

	rt6 = rcu_dereference(vrf->rt6);
	if (likely(rt6)) {
		dst = &rt6->dst;
		dst_hold(dst);
	}

	rcu_read_unlock();

	if (unlikely(!dst)) {
		vrf_tx_error(vrf_dev, skb);
		return NULL;
	}

	skb_dst_drop(skb);
	skb_dst_set(skb, dst);

	return skb;
}

/* holding rtnl */
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
{
@@ -463,6 +527,13 @@ static int vrf_rt6_create(struct net_device *dev)
	return rc;
	return rc;
}
}
#else
#else
static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
				   struct sock *sk,
				   struct sk_buff *skb)
{
	return skb;
}

static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
{
}
}
@@ -531,6 +602,55 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
			    !(IPCB(skb)->flags & IPSKB_REROUTED));
			    !(IPCB(skb)->flags & IPSKB_REROUTED));
}
}


/* set dst on skb to send packet to us via dev_xmit path. Allows
 * packet to go through device based features such as qdisc, netfilter
 * hooks and packet sockets with skb->dev set to vrf device.
 */
static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
				  struct sock *sk,
				  struct sk_buff *skb)
{
	struct net_vrf *vrf = netdev_priv(vrf_dev);
	struct dst_entry *dst = NULL;
	struct rtable *rth;

	rcu_read_lock();

	rth = rcu_dereference(vrf->rth);
	if (likely(rth)) {
		dst = &rth->dst;
		dst_hold(dst);
	}

	rcu_read_unlock();

	if (unlikely(!dst)) {
		vrf_tx_error(vrf_dev, skb);
		return NULL;
	}

	skb_dst_drop(skb);
	skb_dst_set(skb, dst);

	return skb;
}

/* called with rcu lock held */
static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
				  struct sock *sk,
				  struct sk_buff *skb,
				  u16 proto)
{
	switch (proto) {
	case AF_INET:
		return vrf_ip_out(vrf_dev, sk, skb);
	case AF_INET6:
		return vrf_ip6_out(vrf_dev, sk, skb);
	}

	return skb;
}

/* holding rtnl */
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
{
@@ -722,63 +842,6 @@ static u32 vrf_fib_table(const struct net_device *dev)
	return vrf->tb_id;
	return vrf->tb_id;
}
}


static struct rtable *vrf_get_rtable(const struct net_device *dev,
				     const struct flowi4 *fl4)
{
	struct rtable *rth = NULL;

	if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
		struct net_vrf *vrf = netdev_priv(dev);

		rcu_read_lock();

		rth = rcu_dereference(vrf->rth);
		if (likely(rth))
			dst_hold(&rth->dst);

		rcu_read_unlock();
	}

	return rth;
}

/* called under rcu_read_lock */
static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
{
	struct fib_result res = { .tclassid = 0 };
	struct net *net = dev_net(dev);
	u32 orig_tos = fl4->flowi4_tos;
	u8 flags = fl4->flowi4_flags;
	u8 scope = fl4->flowi4_scope;
	u8 tos = RT_FL_TOS(fl4);
	int rc;

	if (unlikely(!fl4->daddr))
		return 0;

	fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
	fl4->flowi4_iif = LOOPBACK_IFINDEX;
	/* make sure oif is set to VRF device for lookup */
	fl4->flowi4_oif = dev->ifindex;
	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
			     RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);

	rc = fib_lookup(net, fl4, &res, 0);
	if (!rc) {
		if (res.type == RTN_LOCAL)
			fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
		else
			fib_select_path(net, &res, fl4, -1);
	}

	fl4->flowi4_flags = flags;
	fl4->flowi4_tos = orig_tos;
	fl4->flowi4_scope = scope;

	return rc;
}

static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
{
	return 0;
	return 0;
@@ -970,26 +1033,23 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev,
}
}


#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
/* send to link-local or multicast address via interface enslaved to
 * VRF device. Force lookup to VRF table without changing flow struct
 */
static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
					      struct flowi6 *fl6)
					      struct flowi6 *fl6)
{
{
	bool need_strict = rt6_need_strict(&fl6->daddr);
	struct net_vrf *vrf = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct net *net = dev_net(dev);
	int flags = RT6_LOOKUP_F_IFACE;
	struct dst_entry *dst = NULL;
	struct dst_entry *dst = NULL;
	struct rt6_info *rt;
	struct rt6_info *rt;


	/* send to link-local or multicast address */
	if (need_strict) {
		int flags = RT6_LOOKUP_F_IFACE;

	/* VRF device does not have a link-local address and
	/* VRF device does not have a link-local address and
	 * sending packets to link-local or mcast addresses over
	 * sending packets to link-local or mcast addresses over
	 * a VRF device does not make sense
	 * a VRF device does not make sense
	 */
	 */
	if (fl6->flowi6_oif == dev->ifindex) {
	if (fl6->flowi6_oif == dev->ifindex) {
			struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
		dst = &net->ipv6.ip6_null_entry->dst;

		dst_hold(dst);
		dst_hold(dst);
		return dst;
		return dst;
	}
	}
@@ -1001,75 +1061,16 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
	if (rt)
	if (rt)
		dst = &rt->dst;
		dst = &rt->dst;


	} else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {

		rcu_read_lock();

		rt = rcu_dereference(vrf->rt6);
		if (likely(rt)) {
			dst = &rt->dst;
			dst_hold(dst);
		}

		rcu_read_unlock();
	}

	/* make sure oif is set to VRF device for lookup */
	if (!need_strict)
		fl6->flowi6_oif = dev->ifindex;

	return dst;
	return dst;
}
}

/* called under rcu_read_lock */
static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
			  struct flowi6 *fl6)
{
	struct net *net = dev_net(dev);
	struct dst_entry *dst;
	struct rt6_info *rt;
	int err;

	if (rt6_need_strict(&fl6->daddr)) {
		rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
					  RT6_LOOKUP_F_IFACE);
		if (unlikely(!rt))
			return 0;

		dst = &rt->dst;
	} else {
		__u8 flags = fl6->flowi6_flags;

		fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
		fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;

		dst = ip6_route_output(net, sk, fl6);
		rt = (struct rt6_info *)dst;

		fl6->flowi6_flags = flags;
	}

	err = dst->error;
	if (!err) {
		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
					  sk ? inet6_sk(sk)->srcprefs : 0,
					  &fl6->saddr);
	}

	dst_release(dst);

	return err;
}
#endif
#endif


static const struct l3mdev_ops vrf_l3mdev_ops = {
static const struct l3mdev_ops vrf_l3mdev_ops = {
	.l3mdev_fib_table	= vrf_fib_table,
	.l3mdev_fib_table	= vrf_fib_table,
	.l3mdev_get_rtable	= vrf_get_rtable,
	.l3mdev_get_saddr	= vrf_get_saddr,
	.l3mdev_l3_rcv		= vrf_l3_rcv,
	.l3mdev_l3_rcv		= vrf_l3_rcv,
	.l3mdev_l3_out		= vrf_l3_out,
#if IS_ENABLED(CONFIG_IPV6)
#if IS_ENABLED(CONFIG_IPV6)
	.l3mdev_get_rt6_dst	= vrf_get_rt6_dst,
	.l3mdev_link_scope_lookup = vrf_link_scope_lookup,
	.l3mdev_get_saddr6	= vrf_get_saddr6,
#endif
#endif
};
};


+1 −2
Original line number Original line Diff line number Diff line
@@ -34,8 +34,7 @@ struct flowi_common {
	__u8	flowic_flags;
	__u8	flowic_flags;
#define FLOWI_FLAG_ANYSRC		0x01
#define FLOWI_FLAG_ANYSRC		0x01
#define FLOWI_FLAG_KNOWN_NH		0x02
#define FLOWI_FLAG_KNOWN_NH		0x02
#define FLOWI_FLAG_L3MDEV_SRC		0x04
#define FLOWI_FLAG_SKIP_NH_OIF		0x04
#define FLOWI_FLAG_SKIP_NH_OIF		0x08
	__u32	flowic_secid;
	__u32	flowic_secid;
	struct flowi_tunnel flowic_tun_key;
	struct flowi_tunnel flowic_tun_key;
};
};
+56 −75
Original line number Original line Diff line number Diff line
@@ -11,6 +11,7 @@
#ifndef _NET_L3MDEV_H_
#ifndef _NET_L3MDEV_H_
#define _NET_L3MDEV_H_
#define _NET_L3MDEV_H_


#include <net/dst.h>
#include <net/fib_rules.h>
#include <net/fib_rules.h>


/**
/**
@@ -18,29 +19,23 @@
 *
 *
 * @l3mdev_fib_table: Get FIB table id to use for lookups
 * @l3mdev_fib_table: Get FIB table id to use for lookups
 *
 *
 * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
 * @l3mdev_l3_rcv:    Hook in L3 receive path
 *
 *
 * @l3mdev_get_saddr: Get source address for a flow
 * @l3mdev_l3_out:    Hook in L3 output path
 *
 *
 * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
 * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations
 */
 */


struct l3mdev_ops {
struct l3mdev_ops {
	u32		(*l3mdev_fib_table)(const struct net_device *dev);
	u32		(*l3mdev_fib_table)(const struct net_device *dev);
	struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
	struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
					  struct sk_buff *skb, u16 proto);
					  struct sk_buff *skb, u16 proto);

	struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
	/* IPv4 ops */
					  struct sock *sk, struct sk_buff *skb,
	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
					  u16 proto);
					     const struct flowi4 *fl4);
	int		(*l3mdev_get_saddr)(struct net_device *dev,
					    struct flowi4 *fl4);


	/* IPv6 ops */
	/* IPv6 ops */
	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
	struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev,
						 struct flowi6 *fl6);
	int		   (*l3mdev_get_saddr6)(struct net_device *dev,
						const struct sock *sk,
						 struct flowi6 *fl6);
						 struct flowi6 *fl6);
};
};


@@ -49,6 +44,8 @@ struct l3mdev_ops {
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
			  struct fib_lookup_arg *arg);
			  struct fib_lookup_arg *arg);


void l3mdev_update_flow(struct net *net, struct flowi *fl);

int l3mdev_master_ifindex_rcu(const struct net_device *dev);
int l3mdev_master_ifindex_rcu(const struct net_device *dev);
static inline int l3mdev_master_ifindex(struct net_device *dev)
static inline int l3mdev_master_ifindex(struct net_device *dev)
{
{
@@ -80,7 +77,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
}
}


static inline
static inline
const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
{
{
	/* netdev_master_upper_dev_get_rcu calls
	/* netdev_master_upper_dev_get_rcu calls
	 * list_first_or_null_rcu to walk the upper dev list.
	 * list_first_or_null_rcu to walk the upper dev list.
@@ -89,7 +86,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
	 * typecast to remove the const
	 * typecast to remove the const
	 */
	 */
	struct net_device *dev = (struct net_device *)_dev;
	struct net_device *dev = (struct net_device *)_dev;
	const struct net_device *master;
	struct net_device *master;


	if (!dev)
	if (!dev)
		return NULL;
		return NULL;
@@ -104,26 +101,6 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
	return master;
	return master;
}
}


/* get index of an interface to use for FIB lookups. For devices
 * enslaved to an L3 master device FIB lookups are based on the
 * master index
 */
static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
	return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
}

static inline int l3mdev_fib_oif(struct net_device *dev)
{
	int oif;

	rcu_read_lock();
	oif = l3mdev_fib_oif_rcu(dev);
	rcu_read_unlock();

	return oif;
}

u32 l3mdev_fib_table_rcu(const struct net_device *dev);
u32 l3mdev_fib_table_rcu(const struct net_device *dev);
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
static inline u32 l3mdev_fib_table(const struct net_device *dev)
static inline u32 l3mdev_fib_table(const struct net_device *dev)
@@ -137,15 +114,6 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev)
	return tb_id;
	return tb_id;
}
}


static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
					       const struct flowi4 *fl4)
{
	if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
		return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);

	return NULL;
}

static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
{
	struct net_device *dev;
	struct net_device *dev;
@@ -165,11 +133,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
	return rc;
	return rc;
}
}


int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6);

struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
		      struct flowi6 *fl6);


static inline
static inline
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -199,6 +163,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
	return l3mdev_l3_rcv(skb, AF_INET6);
	return l3mdev_l3_rcv(skb, AF_INET6);
}
}


static inline
struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
{
	struct net_device *dev = skb_dst(skb)->dev;

	if (netif_is_l3_slave(dev)) {
		struct net_device *master;

		master = netdev_master_upper_dev_get_rcu(dev);
		if (master && master->l3mdev_ops->l3mdev_l3_out)
			skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
								skb, proto);
	}

	return skb;
}

static inline
struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
{
	return l3mdev_l3_out(sk, skb, AF_INET);
}

static inline
struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
{
	return l3mdev_l3_out(sk, skb, AF_INET6);
}
#else
#else


static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -216,20 +208,11 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex)
}
}


static inline
static inline
const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
{
{
	return NULL;
	return NULL;
}
}


static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
{
	return dev ? dev->ifindex : 0;
}
static inline int l3mdev_fib_oif(struct net_device *dev)
{
	return dev ? dev->ifindex : 0;
}

static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
{
{
	return 0;
	return 0;
@@ -243,43 +226,37 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
	return 0;
	return 0;
}
}


static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
					       const struct flowi4 *fl4)
{
	return NULL;
}

static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
{
{
	return false;
	return false;
}
}


static inline int l3mdev_get_saddr(struct net *net, int ifindex,
static inline
				   struct flowi4 *fl4)
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6)
{
{
	return 0;
	return NULL;
}
}


static inline
static inline
struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
{
{
	return NULL;
	return skb;
}
}


static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
static inline
				    struct flowi6 *fl6)
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
{
{
	return 0;
	return skb;
}
}


static inline
static inline
struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
{
{
	return skb;
	return skb;
}
}


static inline
static inline
struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb)
struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
{
{
	return skb;
	return skb;
}
}
@@ -290,6 +267,10 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
{
{
	return 1;
	return 1;
}
}
static inline
void l3mdev_update_flow(struct net *net, struct flowi *fl)
{
}
#endif
#endif


#endif /* _NET_L3MDEV_H_ */
#endif /* _NET_L3MDEV_H_ */
+0 −10
Original line number Original line Diff line number Diff line
@@ -29,7 +29,6 @@
#include <net/flow.h>
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <linux/in_route.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
#include <linux/rcupdate.h>
@@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
			      sport, dport, sk);
			      sport, dport, sk);


	if (!src && oif) {
		int rc;

		rc = l3mdev_get_saddr(net, oif, fl4);
		if (rc < 0)
			return ERR_PTR(rc);

		src = fl4->saddr;
	}
	if (!dst || !src) {
	if (!dst || !src) {
		rt = __ip_route_output_key(net, fl4);
		rt = __ip_route_output_key(net, fl4);
		if (IS_ERR(rt))
		if (IS_ERR(rt))
+3 −0
Original line number Original line Diff line number Diff line
@@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp,
	};
	};
	int err;
	int err;


	/* update flow if oif or iif point to device enslaved to l3mdev */
	l3mdev_update_flow(net, flowi4_to_flowi(flp));

	err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
	err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_ROUTE_CLASSID
	if (arg.rule)
	if (arg.rule)
Loading