Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 79774d6b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'fix-bpf_redirect'



Martin KaFai Lau says:

====================
bpf: Fix bpf_redirect to an ipip/ip6tnl dev

This patch set fixes a bug in bpf_redirect(dev, flags) when dev is an
ipip/ip6tnl.  The current problem is IP-EthHdr-IP is sent out instead of
IP-IP.

Patch 1 adds a dev->type test similar to dev_is_mac_header_xmit()
in act_mirred.c which is only available in net-next.  We can consider to
refactor it once this patch is pulled into net-next from net.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 23dd8315 90e02896
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
			const struct sk_buff *skb);

static __always_inline int ____dev_forward_skb(struct net_device *dev,
					       struct sk_buff *skb)
{
	if (skb_orphan_frags(skb, GFP_ATOMIC) ||
	    unlikely(!is_skb_forwardable(dev, skb))) {
		atomic_long_inc(&dev->rx_dropped);
		kfree_skb(skb);
		return NET_RX_DROP;
	}

	skb_scrub_packet(skb, true);
	skb->priority = 0;
	return 0;
}

void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);

extern int		netdev_budget;
+6 −11
Original line number Diff line number Diff line
@@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);

int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
{
	if (skb_orphan_frags(skb, GFP_ATOMIC) ||
	    unlikely(!is_skb_forwardable(dev, skb))) {
		atomic_long_inc(&dev->rx_dropped);
		kfree_skb(skb);
		return NET_RX_DROP;
	}
	int ret = ____dev_forward_skb(dev, skb);

	skb_scrub_packet(skb, true);
	skb->priority = 0;
	if (likely(!ret)) {
		skb->protocol = eth_type_trans(skb, dev);
		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
	}

	return 0;
	return ret;
}
EXPORT_SYMBOL_GPL(__dev_forward_skb);

+60 −8
Original line number Diff line number Diff line
@@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
	return dev_forward_skb(dev, skb);
}

static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
				      struct sk_buff *skb)
{
	int ret = ____dev_forward_skb(dev, skb);

	if (likely(!ret)) {
		skb->dev = dev;
		ret = netif_rx(skb);
	}

	return ret;
}

static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
{
	int ret;
@@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
	return ret;
}

static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
				 u32 flags)
{
	/* skb->mac_len is not set on normal egress */
	unsigned int mlen = skb->network_header - skb->mac_header;

	__skb_pull(skb, mlen);

	/* At ingress, the mac header has already been pulled once.
	 * At egress, skb_pospull_rcsum has to be done in case that
	 * the skb is originated from ingress (i.e. a forwarded skb)
	 * to ensure that rcsum starts at net header.
	 */
	if (!skb_at_tc_ingress(skb))
		skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
	skb_pop_mac_header(skb);
	skb_reset_mac_len(skb);
	return flags & BPF_F_INGRESS ?
	       __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
}

static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
				 u32 flags)
{
	bpf_push_mac_rcsum(skb);
	return flags & BPF_F_INGRESS ?
	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
}

static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
			  u32 flags)
{
	switch (dev->type) {
	case ARPHRD_TUNNEL:
	case ARPHRD_TUNNEL6:
	case ARPHRD_SIT:
	case ARPHRD_IPGRE:
	case ARPHRD_VOID:
	case ARPHRD_NONE:
		return __bpf_redirect_no_mac(skb, dev, flags);
	default:
		return __bpf_redirect_common(skb, dev, flags);
	}
}

BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
{
	struct net_device *dev;
@@ -1675,10 +1733,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
		return -ENOMEM;
	}

	bpf_push_mac_rcsum(clone);

	return flags & BPF_F_INGRESS ?
	       __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
	return __bpf_redirect(clone, dev, flags);
}

static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1722,10 +1777,7 @@ int skb_do_redirect(struct sk_buff *skb)
		return -EINVAL;
	}

	bpf_push_mac_rcsum(skb);

	return ri->flags & BPF_F_INGRESS ?
	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
	return __bpf_redirect(skb, dev, ri->flags);
}

static const struct bpf_func_proto bpf_redirect_proto = {
+4 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@ hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
hostprogs-y += trace_event
hostprogs-y += sampleip
hostprogs-y += tc_l2_redirect

test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -56,6 +57,7 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
				       test_current_task_under_cgroup_user.o
trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -72,6 +74,7 @@ always += test_probe_write_user_kern.o
always += trace_output_kern.o
always += tcbpf1_kern.o
always += tcbpf2_kern.o
always += tc_l2_redirect_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
always += spintest_kern.o
@@ -111,6 +114,7 @@ HOSTLOADLIBES_xdp2 += -lelf
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
HOSTLOADLIBES_trace_event += -lelf
HOSTLOADLIBES_sampleip += -lelf
HOSTLOADLIBES_tc_l2_redirect += -l elf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
#  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+173 −0
Original line number Diff line number Diff line
#!/bin/bash

[[ -z $TC ]] && TC='tc'
[[ -z $IP ]] && IP='ip'

REDIRECT_USER='./tc_l2_redirect'
REDIRECT_BPF='./tc_l2_redirect_kern.o'

RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)

function config_common {
	local tun_type=$1

	$IP netns add ns1
	$IP netns add ns2
	$IP link add ve1 type veth peer name vens1
	$IP link add ve2 type veth peer name vens2
	$IP link set dev ve1 up
	$IP link set dev ve2 up
	$IP link set dev ve1 mtu 1500
	$IP link set dev ve2 mtu 1500
	$IP link set dev vens1 netns ns1
	$IP link set dev vens2 netns ns2

	$IP -n ns1 link set dev lo up
	$IP -n ns1 link set dev vens1 up
	$IP -n ns1 addr add 10.1.1.101/24 dev vens1
	$IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad
	$IP -n ns1 route add default via 10.1.1.1 dev vens1
	$IP -n ns1 route add default via 2401:db01::1 dev vens1

	$IP -n ns2 link set dev lo up
	$IP -n ns2 link set dev vens2 up
	$IP -n ns2 addr add 10.2.1.102/24 dev vens2
	$IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad
	$IP -n ns2 addr add 10.10.1.102 dev lo
	$IP -n ns2 addr add 2401:face::66/64 dev lo nodad
	$IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1
	$IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1
	$IP -n ns2 link set dev ipt2 up
	$IP -n ns2 link set dev ip6t2 up
	$IP netns exec ns2 $TC qdisc add dev vens2 clsact
	$IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip
	if [[ $tun_type == "ipip" ]]; then
		$IP -n ns2 route add 10.1.1.0/24 dev ipt2
		$IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
		$IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0
	else
		$IP -n ns2 route add 10.1.1.0/24 dev ip6t2
		$IP -n ns2 route add 2401:db01::/64 dev ip6t2
		$IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
		$IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0
	fi

	$IP addr add 10.1.1.1/24 dev ve1
	$IP addr add 2401:db01::1/64 dev ve1 nodad
	$IP addr add 10.2.1.1/24 dev ve2
	$IP addr add 2401:db02::1/64 dev ve2 nodad

	$TC qdisc add dev ve2 clsact
	$TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward

	sysctl -q -w net.ipv4.conf.all.rp_filter=0
	sysctl -q -w net.ipv6.conf.all.forwarding=1
}

function cleanup {
	set +e
	[[ -z $DEBUG ]] || set +x
	$IP netns delete ns1 >& /dev/null
	$IP netns delete ns2 >& /dev/null
	$IP link del ve1 >& /dev/null
	$IP link del ve2 >& /dev/null
	$IP link del ipt >& /dev/null
	$IP link del ip6t >& /dev/null
	sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
	sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
	rm -f /sys/fs/bpf/tc/globals/tun_iface
	[[ -z $DEBUG ]] || set -x
	set -e
}

function l2_to_ipip {
	echo -n "l2_to_ipip $1: "

	local dir=$1

	config_common ipip

	$IP link add ipt type ipip external
	$IP link set dev ipt up
	sysctl -q -w net.ipv4.conf.ipt.rp_filter=0
	sysctl -q -w net.ipv4.conf.ipt.forwarding=1

	if [[ $dir == "egress" ]]; then
		$IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
		$TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
		sysctl -q -w net.ipv4.conf.ve1.forwarding=1
	else
		$TC qdisc add dev ve1 clsact
		$TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
	fi

	$REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex)

	$IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null

	if [[ $dir == "egress" ]]; then
		# test direct egress to ve2 (i.e. not forwarding from
		# ve1 to ve2).
		ping -c1 10.10.1.102 >& /dev/null
	fi

	cleanup

	echo "OK"
}

function l2_to_ip6tnl {
	echo -n "l2_to_ip6tnl $1: "

	local dir=$1

	config_common ip6tnl

	$IP link add ip6t type ip6tnl mode any external
	$IP link set dev ip6t up
	sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0
	sysctl -q -w net.ipv4.conf.ip6t.forwarding=1

	if [[ $dir == "egress" ]]; then
		$IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
		$IP route add 2401:face::/64 via 2401:db02::66 dev ve2
		$TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
		sysctl -q -w net.ipv4.conf.ve1.forwarding=1
	else
		$TC qdisc add dev ve1 clsact
		$TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
	fi

	$REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex)

	$IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
	$IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null

	if [[ $dir == "egress" ]]; then
		# test direct egress to ve2 (i.e. not forwarding from
		# ve1 to ve2).
		ping -c1 10.10.1.102 >& /dev/null
		ping -6 -c1 2401:face::66 >& /dev/null
	fi

	cleanup

	echo "OK"
}

cleanup
test_names="l2_to_ipip l2_to_ip6tnl"
test_dirs="ingress egress"
if [[ $# -ge 2 ]]; then
	test_names=$1
	test_dirs=$2
elif [[ $# -ge 1 ]]; then
	test_names=$1
fi

for t in $test_names; do
	for d in $test_dirs; do
		$t $d
	done
done
Loading