Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 628a5c56 authored by John Heffner's avatar John Heffner Committed by David S. Miller
Browse files

[INET]: Add IP(V6)_PMTUDISC_RPOBE



Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.

Signed-off-by: default avatarJohn Heffner <jheffner@psc.edu>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b881ef76
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ struct in_addr {
#define IP_PMTUDISC_DONT		0	/* Never send DF frames */
#define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
#define IP_PMTUDISC_DO			2	/* Always DF		*/
#define IP_PMTUDISC_PROBE		3       /* Ignore dst pmtu      */

#define IP_MULTICAST_IF			32
#define IP_MULTICAST_TTL 		33
+1 −0
Original line number Diff line number Diff line
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
#define IPV6_PMTUDISC_DONT		0
#define IPV6_PMTUDISC_WANT		1
#define IPV6_PMTUDISC_DO		2
#define IPV6_PMTUDISC_PROBE		3

/* Flowlabel */
#define IPV6_FLOWLABEL_MGR	32
+15 −5
Original line number Diff line number Diff line
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
	return -EINVAL;
}

static inline int ip_skb_dst_mtu(struct sk_buff *skb)
{
	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;

	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
	       skb->dst->dev->mtu : dst_mtu(skb->dst);
}

static inline int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
		return dst_output(skb);
	}
#endif
	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
		return ip_fragment(skb, ip_finish_output2);
	else
		return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
			  htonl(dst_mtu(&rt->u.dst)));
			  htonl(ip_skb_dst_mtu(skb)));
		kfree_skb(skb);
		return -EMSGSIZE;
	}
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
			inet->cork.addr = ipc->addr;
		}
		dst_hold(&rt->u.dst);
		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
					    rt->u.dst.dev->mtu :
					    dst_mtu(rt->u.dst.path);
		inet->cork.rt = rt;
		inet->cork.length = 0;
		sk->sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
	 * to fragment the frame generated here. No matter, what transforms
	 * how transforms change size of the packet, it will come out.
	 */
	if (inet->pmtudisc != IP_PMTUDISC_DO)
	if (inet->pmtudisc < IP_PMTUDISC_DO)
		skb->local_df = 1;

	/* DF bit is set when we want to see DF on outgoing frames.
	 * If local_df is set too, we still allow to fragment this frame
	 * locally. */
	if (inet->pmtudisc == IP_PMTUDISC_DO ||
	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
	    (skb->len <= dst_mtu(&rt->u.dst) &&
	     ip_dont_fragment(sk, &rt->u.dst)))
		df = htons(IP_DF);
+1 −1
Original line number Diff line number Diff line
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
		inet->hdrincl = val ? 1 : 0;
		break;
	case IP_MTU_DISCOVER:
		if (val<0 || val>2)
		if (val<0 || val>3)
			goto e_inval;
		inet->pmtudisc = val;
		break;
+12 −3
Original line number Diff line number Diff line
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
}

static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
{
	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;

	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
	       skb->dst->dev->mtu : dst_mtu(skb->dst);
}

int ip6_output(struct sk_buff *skb)
{
	if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
				dst_allfrag(skb->dst))
		return ip6_fragment(skb, ip6_output2);
	else
@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
	hlen = ip6_find_1stfragopt(skb, &prevhdr);
	nexthdr = *prevhdr;

	mtu = dst_mtu(&rt->u.dst);
	mtu = ip6_skb_dst_mtu(skb);

	/* We must not fragment if the socket is set to force MTU discovery
	 * or if the skb it not generated by a local socket.  (This last
@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
		inet->cork.fl = *fl;
		np->cork.hop_limit = hlimit;
		np->cork.tclass = tclass;
		mtu = dst_mtu(rt->u.dst.path);
		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
		if (np->frag_size < mtu) {
			if (np->frag_size)
				mtu = np->frag_size;
Loading