Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2576b967 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-msg-push-data'



John Fastabend says:

====================
This series adds a new helper bpf_msg_push_data to be used by
sk_msg programs. The helper can be used to insert extra bytes into
the message that can then be used by the program as metadata tags
among other things.

The first patch adds the helper, second patch the libbpf support,
and last patch updates test_sockmap to run msg_push_data tests.

v2: rebase after queue map and in filter.c convert int -> u32
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 5032d079 84fbfe02
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
	return &msg->sg.data[which];
}

static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
{
	return msg->sg.data[which];
}

static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
{
	return sg_page(sk_msg_elem(msg, which));
+19 −1
Original line number Diff line number Diff line
@@ -2240,6 +2240,23 @@ union bpf_attr {
 *		pointer that was returned from bpf_sk_lookup_xxx\ ().
 *	Return
 *		0 on success, or a negative error in case of failure.
 *
 * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
 *	Description
 *		For socket policies, insert *len* bytes into msg at offset
 *		*start*.
 *
 *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
 *		*msg* it may want to insert metadata or options into the msg.
 *		This can later be read and used by any of the lower layer BPF
 *		hooks.
 *
 *		This helper may fail if under memory pressure (a malloc
 *		fails) in these cases BPF programs will get an appropriate
 *		error and BPF programs will need to handle them.
 *
 *	Return
 *		0 on success, or a negative error in case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2331,7 +2348,8 @@ union bpf_attr {
	FN(sk_release),			\
	FN(map_push_elem),		\
	FN(map_pop_elem),		\
	FN(map_peek_elem),
	FN(map_peek_elem),		\
	FN(msg_push_data),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+134 −0
Original line number Diff line number Diff line
@@ -2297,6 +2297,137 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = {
	.arg4_type	= ARG_ANYTHING,
};

BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
	   u32, len, u64, flags)
{
	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
	u32 new, i = 0, l, space, copy = 0, offset = 0;
	u8 *raw, *to, *from;
	struct page *page;

	if (unlikely(flags))
		return -EINVAL;

	/* First find the starting scatterlist element */
	i = msg->sg.start;
	do {
		l = sk_msg_elem(msg, i)->length;

		if (start < offset + l)
			break;
		offset += l;
		sk_msg_iter_var_next(i);
	} while (i != msg->sg.end);

	if (start >= offset + l)
		return -EINVAL;

	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);

	/* If no space available will fallback to copy, we need at
	 * least one scatterlist elem available to push data into
	 * when start aligns to the beginning of an element or two
	 * when it falls inside an element. We handle the start equals
	 * offset case because its the common case for inserting a
	 * header.
	 */
	if (!space || (space == 1 && start != offset))
		copy = msg->sg.data[i].length;

	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
			   get_order(copy + len));
	if (unlikely(!page))
		return -ENOMEM;

	if (copy) {
		int front, back;

		raw = page_address(page);

		psge = sk_msg_elem(msg, i);
		front = start - offset;
		back = psge->length - front;
		from = sg_virt(psge);

		if (front)
			memcpy(raw, from, front);

		if (back) {
			from += front;
			to = raw + front + len;

			memcpy(to, from, back);
		}

		put_page(sg_page(psge));
	} else if (start - offset) {
		psge = sk_msg_elem(msg, i);
		rsge = sk_msg_elem_cpy(msg, i);

		psge->length = start - offset;
		rsge.length -= psge->length;
		rsge.offset += start;

		sk_msg_iter_var_next(i);
		sg_unmark_end(psge);
		sk_msg_iter_next(msg, end);
	}

	/* Slot(s) to place newly allocated data */
	new = i;

	/* Shift one or two slots as needed */
	if (!copy) {
		sge = sk_msg_elem_cpy(msg, i);

		sk_msg_iter_var_next(i);
		sg_unmark_end(&sge);
		sk_msg_iter_next(msg, end);

		nsge = sk_msg_elem_cpy(msg, i);
		if (rsge.length) {
			sk_msg_iter_var_next(i);
			nnsge = sk_msg_elem_cpy(msg, i);
		}

		while (i != msg->sg.end) {
			msg->sg.data[i] = sge;
			sge = nsge;
			sk_msg_iter_var_next(i);
			if (rsge.length) {
				nsge = nnsge;
				nnsge = sk_msg_elem_cpy(msg, i);
			} else {
				nsge = sk_msg_elem_cpy(msg, i);
			}
		}
	}

	/* Place newly allocated data buffer */
	sk_mem_charge(msg->sk, len);
	msg->sg.size += len;
	msg->sg.copy[new] = false;
	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
	if (rsge.length) {
		get_page(sg_page(&rsge));
		sk_msg_iter_var_next(new);
		msg->sg.data[new] = rsge;
	}

	sk_msg_compute_data_pointers(msg);
	return 0;
}

static const struct bpf_func_proto bpf_msg_push_data_proto = {
	.func		= bpf_msg_push_data,
	.gpl_only	= false,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_CTX,
	.arg2_type	= ARG_ANYTHING,
	.arg3_type	= ARG_ANYTHING,
	.arg4_type	= ARG_ANYTHING,
};

BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
	return task_get_classid(skb);
@@ -4854,6 +4985,7 @@ bool bpf_helper_changes_pkt_data(void *func)
	    func == bpf_xdp_adjust_head ||
	    func == bpf_xdp_adjust_meta ||
	    func == bpf_msg_pull_data ||
	    func == bpf_msg_push_data ||
	    func == bpf_xdp_adjust_tail ||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
	    func == bpf_lwt_seg6_store_bytes ||
@@ -5130,6 +5262,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
		return &bpf_msg_cork_bytes_proto;
	case BPF_FUNC_msg_pull_data:
		return &bpf_msg_pull_data_proto;
	case BPF_FUNC_msg_push_data:
		return &bpf_msg_push_data_proto;
	case BPF_FUNC_get_local_storage:
		return &bpf_get_local_storage_proto;
	default:
+19 −1
Original line number Diff line number Diff line
@@ -2240,6 +2240,23 @@ union bpf_attr {
 *		pointer that was returned from bpf_sk_lookup_xxx\ ().
 *	Return
 *		0 on success, or a negative error in case of failure.
 *
 * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
 *	Description
 *		For socket policies, insert *len* bytes into msg at offset
 *		*start*.
 *
 *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
 *		*msg* it may want to insert metadata or options into the msg.
 *		This can later be read and used by any of the lower layer BPF
 *		hooks.
 *
 *		This helper may fail if under memory pressure (a malloc
 *		fails) in these cases BPF programs will get an appropriate
 *		error and BPF programs will need to handle them.
 *
 *	Return
 *		0 on success, or a negative error in case of failure.
 */
#define __BPF_FUNC_MAPPER(FN)		\
	FN(unspec),			\
@@ -2331,7 +2348,8 @@ union bpf_attr {
	FN(sk_release),			\
	FN(map_push_elem),		\
	FN(map_pop_elem),		\
	FN(map_peek_elem),
	FN(map_peek_elem),		\
	FN(msg_push_data),

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
 * function eBPF program intends to call
+2 −0
Original line number Diff line number Diff line
@@ -111,6 +111,8 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
	(void *) BPF_FUNC_msg_cork_bytes;
static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
	(void *) BPF_FUNC_msg_pull_data;
static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
	(void *) BPF_FUNC_msg_push_data;
static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
	(void *) BPF_FUNC_bind;
static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
Loading