Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a6ff1a2f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'nexthop_exceptions'



These patches implement the final mechanism necessary to really allow
us to go without the route cache in ipv4.

We need a place to have long-term storage of PMTU/redirect information
which is independent of the routes themselves, yet does not get us
back into a situation where we have to write to metrics or anything
like that.

For this we use an "next-hop exception" table in the FIB nexthops.

The one thing I desperately want to avoid is having to create clone
routes in the FIB trie for this purpose, because that is very
expensive.   However, I'm willing to entertain such an idea later
if this current scheme proves to have downsides that the FIB trie
variant would not have.

In order to accomodate this any such scheme, we need to be able to
produce a full flow key at PMTU/redirect time.  That required an
adjustment of the interface call-sites used to propagate these events.

For a PMTU/redirect with a fully specified socket, we pass that socket
and use it to produce the flow key.

Otherwise we use a passed in SKB to formulate the key.  There are two
cases that need to be distinguished, ICMP message processing (in which
case the IP header is at skb->data) and output packet processing
(mostly tunnels, and in all such cases the IP header is at ip_hdr(skb)).

We also have to make the code able to handle the case where the dst
itself passed into the dst_ops->{update_pmtu,redirect} method is
invalidated.  This matters for calls from sockets that have cached
that route.  We provide a inet{,6} helper function for this purpose,
and edit SCTP specially since it caches routes at the transport rather
than socket level.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bd2d0837 4895c771
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1397,7 +1397,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
	int e = skb_queue_empty(&priv->cm.skb_queue);

	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);

	skb_queue_tail(&priv->cm.skb_queue, skb);
	if (e)
+4 −2
Original line number Diff line number Diff line
@@ -24,8 +24,10 @@ struct dst_ops {
					  struct net_device *dev, int how);
	struct dst_entry *	(*negative_advice)(struct dst_entry *);
	void			(*link_failure)(struct sk_buff *);
	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
	void			(*redirect)(struct dst_entry *dst, struct sk_buff *skb);
	void			(*update_pmtu)(struct dst_entry *dst, struct sock *sk,
					       struct sk_buff *skb, u32 mtu);
	void			(*redirect)(struct dst_entry *dst, struct sock *sk,
					    struct sk_buff *skb);
	int			(*local_out)(struct sk_buff *skb);
	struct neighbour *	(*neigh_lookup)(const struct dst_entry *dst,
						struct sk_buff *skb,
+2 −0
Original line number Diff line number Diff line
@@ -43,4 +43,6 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);

extern int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl);

extern struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET6_CONNECTION_SOCK_H */
+2 −0
Original line number Diff line number Diff line
@@ -337,4 +337,6 @@ extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
				      char __user *optval, int __user *optlen);
extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
				      char __user *optval, unsigned int optlen);

extern struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#endif /* _INET_CONNECTION_SOCK_H */
+18 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@

#include <net/flow.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/fib_rules.h>
#include <net/inetpeer.h>

@@ -46,6 +47,22 @@ struct fib_config {

struct fib_info;

struct fib_nh_exception {
	struct fib_nh_exception __rcu	*fnhe_next;
	__be32				fnhe_daddr;
	u32				fnhe_pmtu;
	u32				fnhe_gw;
	unsigned long			fnhe_expires;
	unsigned long			fnhe_stamp;
};

struct fnhe_hash_bucket {
	struct fib_nh_exception __rcu	*chain;
};

#define FNHE_HASH_SIZE		2048
#define FNHE_RECLAIM_DEPTH	5

struct fib_nh {
	struct net_device	*nh_dev;
	struct hlist_node	nh_hash;
@@ -63,6 +80,7 @@ struct fib_nh {
	__be32			nh_gw;
	__be32			nh_saddr;
	int			nh_saddr_genid;
	struct fnhe_hash_bucket	*nh_exceptions;
};

/*
Loading