Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 29ba6e74 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'replace-dst_confirm'



Julian Anastasov says:

====================
net: dst_confirm replacement

	This patchset addresses the problem of neighbour
confirmation where received replies from one nexthop
can cause confirmation of different nexthop when using
the same dst. Thanks to YueHaibing <yuehaibing@huawei.com>
for tracking the dst->pending_confirm problem.

	Sockets can obtain cached output route. Such
routes can be to known nexthop (rt_gateway=IP) or to be
used simultaneously for different nexthop IPs by different
subnet prefixes (nh->nh_scope = RT_SCOPE_HOST, rt_gateway=0).

	At first look, there are more problems:

- dst_confirm() sets flag on dst and not on dst->path,
as result, indication is lost when XFRM is used

- DNAT can change the nexthop, so the really used nexthop is
not confirmed

	So, the following solution is to avoid using
dst->pending_confirm.

	The current dst_confirm() usage is as follows:

Protocols confirming dst on received packets:
- TCP (1 dst per socket)
- SCTP (1 dst per transport)
- CXGB*

Protocols supporting sendmsg with MSG_CONFIRM [ | MSG_PROBE ] to
confirm neighbour:
- UDP IPv4/IPv6
- ICMPv4 PING
- RAW IPv4/IPv6
- L2TP/IPv6

MSG_CONFIRM for other purposes (fix not needed):
- CAN

Sending without locking the socket:
- UDP (when no cork)
- RAW (when hdrincl=1)

Redirects from old to new GW:
- rt6_do_redirect

	The patchset includes the following changes:

1. sock: add sk_dst_pending_confirm flag

- used only by TCP with patch 4 to remember the received
indication in sk->sk_dst_pending_confirm

2. net: add dst_pending_confirm flag to skbuff

- skb->dst_pending_confirm will be used by all protocols
in following patches, via skb_{set,get}_dst_pending_confirm

3. sctp: add dst_pending_confirm flag

- SCTP uses per-transport dsts and can not use
sk->sk_dst_pending_confirm like TCP

4. tcp: replace dst_confirm with sk_dst_confirm

5. net: add confirm_neigh method to dst_ops

- IPv4 and IPv6 provision for slow neigh lookups for MSG_PROBE users.
I decided to use neigh lookup only for this case because on
MSG_PROBE the skb may pass MTU checks but it does not reach
the neigh confirmation code. This patch will be used from patch 6.

- xfrm_confirm_neigh: we use the last tunnel address, if present.
When there are only transports, the original dest address is used.

6. net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP

- dst_confirm conversion for UDP, RAW, ICMP and L2TP/IPv6

- these protocols use MSG_CONFIRM propagated by ip*_append_data
to skb->dst_pending_confirm. sk->sk_dst_pending_confirm is not
used because some sending paths do not lock the socket. For
MSG_PROBE we use the slow lookup (dst_confirm_neigh).

- there are also 2 cases that need the slow lookup:
__ip6_rt_update_pmtu and rt6_do_redirect. I hope
&ipv6_hdr(skb)->saddr is the correct nexthop address to use here.

7. net: pending_confirm is not used anymore

- I failed to understand the CXGB* code, I see dst_confirm()
calls but I'm not sure dst_neigh_output() was called. For now
I just removed the dst->pending_confirm flag and left all
dst_confirm() calls there. Any better idea?

- Now may be old function neigh_output() should be restored
instead of dst_neigh_output?
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b08d46b0 51ce8bd4
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -378,6 +378,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
	if (unlikely(!neigh))
		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
	if (!IS_ERR(neigh)) {
		sock_confirm_neigh(skb, neigh);
		ret = dst_neigh_output(dst, neigh, skb);
		rcu_read_unlock_bh();
		return ret;
@@ -574,8 +575,10 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
	if (unlikely(!neigh))
		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
	if (!IS_ERR(neigh))
	if (!IS_ERR(neigh)) {
		sock_confirm_neigh(skb, neigh);
		ret = dst_neigh_output(dst, neigh, skb);
	}

	rcu_read_unlock_bh();
err:
+12 −0
Original line number Diff line number Diff line
@@ -612,6 +612,7 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
 *	@wifi_acked_valid: wifi_acked was set
 *	@wifi_acked: whether frame was acked on wifi or not
 *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
 *	@dst_pending_confirm: need to confirm neighbour
  *	@napi_id: id of the NAPI struct this skb came from
 *	@secmark: security marking
 *	@mark: Generic packet mark
@@ -741,6 +742,7 @@ struct sk_buff {
	__u8			csum_level:2;
	__u8			csum_bad:1;

	__u8			dst_pending_confirm:1;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
	__u8			ndisc_nodetype:2;
#endif
@@ -3698,6 +3700,16 @@ static inline bool skb_rx_queue_recorded(const struct sk_buff *skb)
	return skb->queue_mapping != 0;
}

static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val)
{
	skb->dst_pending_confirm = val;
}

static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
{
	return skb->dst_pending_confirm != 0;
}

static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
+16 −0
Original line number Diff line number Diff line
@@ -35,6 +35,22 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
	return n;
}

static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
{
	struct neighbour *n;

	rcu_read_lock_bh();
	n = __ipv4_neigh_lookup_noref(dev, key);
	if (n) {
		unsigned long now = jiffies;

		/* avoid dirtying neighbour */
		if (n->confirmed != now)
			n->confirmed = now;
	}
	rcu_read_unlock_bh();
}

void arp_init(void);
int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
void arp_send(int type, int ptype, __be32 dest_ip,
+9 −12
Original line number Diff line number Diff line
@@ -59,8 +59,6 @@ struct dst_entry {
#define DST_XFRM_QUEUE		0x0100
#define DST_METADATA		0x0200

	unsigned short		pending_confirm;

	short			error;

	/* A non-zero value of dst->obsolete forces by-hand validation
@@ -78,6 +76,8 @@ struct dst_entry {
#define DST_OBSOLETE_KILL	-2
	unsigned short		header_len;	/* more space at head required */
	unsigned short		trailer_len;	/* space to reserve at tail */
	unsigned short		__pad3;

#ifdef CONFIG_IP_ROUTE_CLASSID
	__u32			tclassid;
#else
@@ -440,7 +440,6 @@ static inline void dst_rcu_free(struct rcu_head *head)

static inline void dst_confirm(struct dst_entry *dst)
{
	dst->pending_confirm = 1;
}

static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
@@ -448,15 +447,6 @@ static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
{
	const struct hh_cache *hh;

	if (dst->pending_confirm) {
		unsigned long now = jiffies;

		dst->pending_confirm = 0;
		/* avoid dirtying neighbour */
		if (n->confirmed != now)
			n->confirmed = now;
	}

	hh = &n->hh;
	if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
		return neigh_hh_output(hh, skb);
@@ -477,6 +467,13 @@ static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst
	return IS_ERR(n) ? NULL : n;
}

static inline void dst_confirm_neigh(const struct dst_entry *dst,
				     const void *daddr)
{
	if (dst->ops->confirm_neigh)
		dst->ops->confirm_neigh(dst, daddr);
}

static inline void dst_link_failure(struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
+2 −0
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@ struct dst_ops {
	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst,
						struct sk_buff *skb,
						const void *daddr);
	void			(*confirm_neigh)(const struct dst_entry *dst,
						 const void *daddr);

	struct kmem_cache	*kmem_cachep;

Loading