Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 74b4bed9 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-permit-skb_segment-on-head_frag-frag_list-skb'



Yonghong Song says:

====================
net: permit skb_segment on head_frag frag_list skb

One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at
function skb_segment(), line 3667. The bpf program attaches to
clsact ingress, calls bpf_skb_change_proto to change protocol
from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect
to send the changed packet out.
 ...
    3665                 while (pos < offset + len) {
    3666                         if (i >= nfrags) {
    3667                                 BUG_ON(skb_headlen(list_skb));
 ...

The triggering input skb has the following properties:
    list_skb = skb->frag_list;
    skb->nfrags != NULL && skb_headlen(list_skb) != 0
and skb_segment() is not able to handle a frag_list skb
if its headlen (list_skb->len - list_skb->data_len) is not 0.

Patch #1 provides a simple solution to avoid BUG_ON. If
list_skb->head_frag is true, its page-backed frag will
be processed before the list_skb->frags.
Patch #2 provides a test case in test_bpf module which
constructs a skb and calls skb_segment() directly. The test
case is able to trigger the BUG_ON without Patch #1.

The patch has been tested in the following setup:
  ipv6_host <-> nat_server <-> ipv4_host
where nat_server has a bpf program doing ipv4<->ipv6
translation and forwarding through clsact hook
bpf_skb_change_proto.

Changelog:
v5 -> v6:
  . Added back missed BUG_ON(!nfrags) for zero
    skb_headlen(skb) case, plus a couple of
    cosmetic changes, from Alexander.
v4 -> v5:
  . Replace local variable head_frag with
    a static inline function skb_head_frag_to_page_desc
    which gets the head_frag on-demand. This makes
    code more readable and also does not increase
    the stack size, from Alexander.
  . Remove the "if(nfrags)" guard for skb_orphan_frags
    and skb_zerocopy_clone as I found that they can
    handle zero-frag skb (with non-zero skb_headlen(skb))
    properly.
  . Properly release segment list from skb_segment()
    in the test, from Eric.
v3 -> v4:
  . Remove dynamic memory allocation and use rewinding
    for both index and frag to remove one branch in fast path,
    from Alexander.
  . Fix a bunch of issues in test_bpf skb_segment() test,
    including proper way to allocate skb, proper function
    argument for skb_add_rx_frag and not freeint skb, etc.,
    from Eric.
v2 -> v3:
  . Use starting frag index -1 (instead of 0) to
    special process head_frag before other frags in the skb,
    from Alexander Duyck.
v1 -> v2:
  . Removed never-hit BUG_ON, spotted by Linyu Yuan.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 03740165 76db8087
Loading
Loading
Loading
Loading
+91 −2
Original line number Diff line number Diff line
@@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id)
	return test_id < test_range[0] || test_id > test_range[1];
}

static __init struct sk_buff *build_test_skb(void)
{
	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
	struct sk_buff *skb[2];
	struct page *page[2];
	int i, data_size = 8;

	for (i = 0; i < 2; i++) {
		page[i] = alloc_page(GFP_KERNEL);
		if (!page[i]) {
			if (i == 0)
				goto err_page0;
			else
				goto err_page1;
		}

		/* this will set skb[i]->head_frag */
		skb[i] = dev_alloc_skb(headroom + data_size);
		if (!skb[i]) {
			if (i == 0)
				goto err_skb0;
			else
				goto err_skb1;
		}

		skb_reserve(skb[i], headroom);
		skb_put(skb[i], data_size);
		skb[i]->protocol = htons(ETH_P_IP);
		skb_reset_network_header(skb[i]);
		skb_set_mac_header(skb[i], -ETH_HLEN);

		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
	}

	/* setup shinfo */
	skb_shinfo(skb[0])->gso_size = 1448;
	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
	skb_shinfo(skb[0])->gso_segs = 0;
	skb_shinfo(skb[0])->frag_list = skb[1];

	/* adjust skb[0]'s len */
	skb[0]->len += skb[1]->len;
	skb[0]->data_len += skb[1]->data_len;
	skb[0]->truesize += skb[1]->truesize;

	return skb[0];

err_skb1:
	__free_page(page[1]);
err_page1:
	kfree_skb(skb[0]);
err_skb0:
	__free_page(page[0]);
err_page0:
	return NULL;
}

static __init int test_skb_segment(void)
{
	netdev_features_t features;
	struct sk_buff *skb, *segs;
	int ret = -1;

	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
		   NETIF_F_IPV6_CSUM;
	features |= NETIF_F_RXCSUM;
	skb = build_test_skb();
	if (!skb) {
		pr_info("%s: failed to build_test_skb", __func__);
		goto done;
	}

	segs = skb_segment(skb, features);
	if (segs) {
		kfree_skb_list(segs);
		ret = 0;
		pr_info("%s: success in skb_segment!", __func__);
	} else {
		pr_info("%s: failed in skb_segment!", __func__);
	}
	kfree_skb(skb);
done:
	return ret;
}

static __init int test_bpf(void)
{
	int i, err_cnt = 0, pass_cnt = 0;
@@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void)
		return ret;

	ret = test_bpf();

	destroy_bpf_tests();
	if (ret)
		return ret;

	return test_skb_segment();
}

static void __exit test_bpf_exit(void)
+22 −5
Original line number Diff line number Diff line
@@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);

static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
{
	skb_frag_t head_frag;
	struct page *page;

	page = virt_to_head_page(frag_skb->head);
	head_frag.page.p = page;
	head_frag.page_offset = frag_skb->data -
		(unsigned char *)page_address(page);
	head_frag.size = skb_headlen(frag_skb);
	return head_frag;
}

/**
 *	skb_segment - Perform protocol segmentation on skb.
 *	@head_skb: buffer to segment
@@ -3664,15 +3677,19 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,

		while (pos < offset + len) {
			if (i >= nfrags) {
				BUG_ON(skb_headlen(list_skb));

				i = 0;
				nfrags = skb_shinfo(list_skb)->nr_frags;
				frag = skb_shinfo(list_skb)->frags;
				frag_skb = list_skb;

				if (!skb_headlen(list_skb)) {
					BUG_ON(!nfrags);
				} else {
					BUG_ON(!list_skb->head_frag);

					/* to make room for head_frag. */
					i--;
					frag--;
				}
				if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
				    skb_zerocopy_clone(nskb, frag_skb,
						       GFP_ATOMIC))
@@ -3689,7 +3706,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
				goto err;
			}

			*nskb_frag = *frag;
			*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
			__skb_frag_ref(nskb_frag);
			size = skb_frag_size(nskb_frag);