Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d32aebfd authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'gso_forward'



Florian Westphal says:

====================
net: ip: push gso skb forwarding handling down the stack

Turns out doing the segmentation in forwarding was not a bright idea,
there are corner-cases where this has unintended side-effects.

This patch pushes the segmentation downwards.

After this, netif_skb_dev_features() function can be removed
again, it was only added to fetch the features of the output device,
we can just use skb->dev after the pushdown.

Tested with following setup:

host -> kvm_router  -> kvm_host
  mtu 1500        mtu1280

- 'host' has route to kvm_host with locked mtu of 1500
- gso/gro enabled on all interfaces

Did tests with all of following combinations:
- netfilter conntrack off and on on kvm_router
- virtio-net and e1000 driver on kvm_router
- tcp and udp bulk xmit from host to kvm_host

for tcp, I added TCPMSS mangling on kvm_host to make it lie about tcp mss.

Also added a dummy '-t mangle -A POSTROUTING -p udp -f'
rule to make sure no udp fragments are seen in the 'conntrack on'
and 'virtio-net' case.

Also checked (with ping -M do -s 1400)' that it still sends the wanted
icmp error message when size exceeds 1280.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 418a3156 c1e756bf
Loading
Loading
Loading
Loading
+1 −6
Original line number Diff line number Diff line
@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev);
void netif_stacked_transfer_operstate(const struct net_device *rootdev,
					struct net_device *dev);

netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
					 const struct net_device *dev);
static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
{
	return netif_skb_dev_features(skb, skb->dev);
}
netdev_features_t netif_skb_features(struct sk_buff *skb);

static inline bool net_gso_ok(netdev_features_t features, int gso_type)
{
+10 −12
Original line number Diff line number Diff line
@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
 * 2. No high memory really exists on this machine.
 */

static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
	int i;
@@ -2493,7 +2493,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}

static netdev_features_t harmonize_features(struct sk_buff *skb,
					    const struct net_device *dev,
	netdev_features_t features)
{
	int tmp;
@@ -2501,30 +2500,29 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
	if (skb->ip_summed != CHECKSUM_NONE &&
	    !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
		features &= ~NETIF_F_ALL_CSUM;
	} else if (illegal_highdma(dev, skb)) {
	} else if (illegal_highdma(skb->dev, skb)) {
		features &= ~NETIF_F_SG;
	}

	return features;
}

netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
					 const struct net_device *dev)
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
	__be16 protocol = skb->protocol;
	netdev_features_t features = dev->features;
	netdev_features_t features = skb->dev->features;

	if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
		features &= ~NETIF_F_GSO_MASK;

	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
		protocol = veh->h_vlan_encapsulated_proto;
	} else if (!vlan_tx_tag_present(skb)) {
		return harmonize_features(skb, dev, features);
		return harmonize_features(skb, features);
	}

	features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
					       NETIF_F_HW_VLAN_STAG_TX);

	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
				NETIF_F_HW_VLAN_STAG_TX;

	return harmonize_features(skb, dev, features);
	return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_dev_features);
EXPORT_SYMBOL(netif_skb_features);

int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
			struct netdev_queue *txq)
+0 −50
Original line number Diff line number Diff line
@@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
	return true;
}

static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
{
	unsigned int mtu;

	if (skb->local_df || !skb_is_gso(skb))
		return false;

	mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);

	/* if seglen > mtu, do software segmentation for IP fragmentation on
	 * output.  DF bit cannot be set since ip_forward would have sent
	 * icmp error.
	 */
	return skb_gso_network_seglen(skb) > mtu;
}

/* called if GSO skb needs to be fragmented on forward */
static int ip_forward_finish_gso(struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
	netdev_features_t features;
	struct sk_buff *segs;
	int ret = 0;

	features = netif_skb_dev_features(skb, dst->dev);
	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
	if (IS_ERR(segs)) {
		kfree_skb(skb);
		return -ENOMEM;
	}

	consume_skb(skb);

	do {
		struct sk_buff *nskb = segs->next;
		int err;

		segs->next = NULL;
		err = dst_output(segs);

		if (err && ret == 0)
			ret = err;
		segs = nskb;
	} while (segs);

	return ret;
}

static int ip_forward_finish(struct sk_buff *skb)
{
@@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb)
	if (unlikely(opt->optlen))
		ip_forward_options(skb);

	if (ip_gso_exceeds_dst_mtu(skb))
		return ip_forward_finish_gso(skb);

	return dst_output(skb);
}

+48 −3
Original line number Diff line number Diff line
@@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb)
	return -EINVAL;
}

static int ip_finish_output_gso(struct sk_buff *skb)
{
	netdev_features_t features;
	struct sk_buff *segs;
	int ret = 0;

	/* common case: locally created skb or seglen is <= mtu */
	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
	      skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb))
		return ip_finish_output2(skb);

	/* Slowpath -  GSO segment length is exceeding the dst MTU.
	 *
	 * This can happen in two cases:
	 * 1) TCP GRO packet, DF bit not set
	 * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
	 * from host network stack.
	 */
	features = netif_skb_features(skb);
	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
	if (IS_ERR(segs)) {
		kfree_skb(skb);
		return -ENOMEM;
	}

	consume_skb(skb);

	do {
		struct sk_buff *nskb = segs->next;
		int err;

		segs->next = NULL;
		err = ip_fragment(segs, ip_finish_output2);

		if (err && ret == 0)
			ret = err;
		segs = nskb;
	} while (segs);

	return ret;
}

static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -220,9 +262,12 @@ static int ip_finish_output(struct sk_buff *skb)
		return dst_output(skb);
	}
#endif
	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
	if (skb_is_gso(skb))
		return ip_finish_output_gso(skb);

	if (skb->len > ip_skb_dst_mtu(skb))
		return ip_fragment(skb, ip_finish_output2);
	else

	return ip_finish_output2(skb);
}