Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2367bd99 authored by Daniel Borkmann's avatar Daniel Borkmann
Browse files

Merge branch 'bpf-nfp-perf-event-improvements'



Jakub Kicinski says:

====================
This set is focused on improving the performance of perf events
reported from BPF offload.  Perf events can now be received on
packet data queues, which significantly improves the performance
(from total of 0.5 Msps to 5Msps per core).  To get to this
performance we need a fast path for control messages which will
operate on raw buffers and recycle them immediately.

Patch 5 replaces the map pointers for perf maps with map IDs.
We look the pointers up in a hashtable, anyway, to validate they
are correct, so there is no performance difference.  Map IDs
have the advantage of being easier to understand for users in
case of errors (we no longer print raw pointers to the logs).

Last patch improves info messages about map offload.
====================

Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 9778cfdf 17082566
Loading
Loading
Loading
Loading
+22 −3
Original line number Diff line number Diff line
@@ -43,8 +43,6 @@
#include "fw.h"
#include "main.h"

#define cmsg_warn(bpf, msg...)	nn_dp_warn(&(bpf)->app->ctrl->dp, msg)

#define NFP_BPF_TAG_ALLOC_SPAN	(U16_MAX / 4)

static bool nfp_bpf_all_tags_busy(struct nfp_app_bpf *bpf)
@@ -441,7 +439,10 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
	}

	if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
		nfp_bpf_event_output(bpf, skb);
		if (!nfp_bpf_event_output(bpf, skb->data, skb->len))
			dev_consume_skb_any(skb);
		else
			dev_kfree_skb_any(skb);
		return;
	}

@@ -465,3 +466,21 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
err_free:
	dev_kfree_skb_any(skb);
}

void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data, unsigned int len)
{
	struct nfp_app_bpf *bpf = app->priv;
	const struct cmsg_hdr *hdr = data;

	if (unlikely(len < sizeof(struct cmsg_reply_map_simple))) {
		cmsg_warn(bpf, "cmsg drop - too short %d!\n", len);
		return;
	}

	if (hdr->type == CMSG_TYPE_BPF_EVENT)
		nfp_bpf_event_output(bpf, data, len);
	else
		cmsg_warn(bpf, "cmsg drop - msg type %d with raw buffer!\n",
			  hdr->type);
}
+8 −4
Original line number Diff line number Diff line
@@ -3883,6 +3883,7 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
	struct nfp_insn_meta *meta1, *meta2;
	struct nfp_bpf_map *nfp_map;
	struct bpf_map *map;
	u32 id;

	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
		if (meta1->skip || meta2->skip)
@@ -3894,11 +3895,14 @@ static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)

		map = (void *)(unsigned long)((u32)meta1->insn.imm |
					      (u64)meta2->insn.imm << 32);
		if (bpf_map_offload_neutral(map))
			continue;
		if (bpf_map_offload_neutral(map)) {
			id = map->id;
		} else {
			nfp_map = map_to_offmap(map)->dev_priv;
			id = nfp_map->tid;
		}

		meta1->insn.imm = nfp_map->tid;
		meta1->insn.imm = id;
		meta2->insn.imm = 0;
	}

+3 −2
Original line number Diff line number Diff line
@@ -45,8 +45,8 @@

const struct rhashtable_params nfp_bpf_maps_neutral_params = {
	.nelem_hint		= 4,
	.key_len		= FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
	.key_offset		= offsetof(struct nfp_bpf_neutral_map, ptr),
	.key_len		= FIELD_SIZEOF(struct bpf_map, id),
	.key_offset		= offsetof(struct nfp_bpf_neutral_map, map_id),
	.head_offset		= offsetof(struct nfp_bpf_neutral_map, l),
	.automatic_shrinking	= true,
};
@@ -490,6 +490,7 @@ const struct nfp_app_type app_bpf = {
	.vnic_free	= nfp_bpf_vnic_free,

	.ctrl_msg_rx	= nfp_bpf_ctrl_msg_rx,
	.ctrl_msg_rx_raw	= nfp_bpf_ctrl_msg_rx_raw,

	.setup_tc	= nfp_bpf_setup_tc,
	.bpf		= nfp_ndo_bpf,
+8 −1
Original line number Diff line number Diff line
@@ -47,6 +47,8 @@
#include "../nfp_asm.h"
#include "fw.h"

#define cmsg_warn(bpf, msg...)	nn_dp_warn(&(bpf)->app->ctrl->dp, msg)

/* For relocation logic use up-most byte of branch instruction as scratch
 * area.  Remember to clear this before sending instructions to HW!
 */
@@ -221,6 +223,7 @@ struct nfp_bpf_map {
struct nfp_bpf_neutral_map {
	struct rhash_head l;
	struct bpf_map *ptr;
	u32 map_id;
	u32 count;
};

@@ -501,7 +504,11 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
			       void *key, void *next_key);

int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
			 unsigned int len);

void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
void
nfp_bpf_ctrl_msg_rx_raw(struct nfp_app *app, const void *data,
			unsigned int len);
#endif
+38 −25
Original line number Diff line number Diff line
@@ -67,7 +67,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
	ASSERT_RTNL();

	/* Reuse path - other offloaded program is already tracking this map. */
	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map->id,
					nfp_bpf_maps_neutral_params);
	if (record) {
		nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
@@ -89,6 +89,7 @@ nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
	}

	record->ptr = map;
	record->map_id = map->id;
	record->count = 1;

	err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
@@ -379,11 +380,23 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
			bpf->maps.max_elems - bpf->map_elems_in_use);
		return -ENOMEM;
	}
	if (offmap->map.key_size > bpf->maps.max_key_sz ||
	    offmap->map.value_size > bpf->maps.max_val_sz ||
	    round_up(offmap->map.key_size, 8) +

	if (round_up(offmap->map.key_size, 8) +
	    round_up(offmap->map.value_size, 8) > bpf->maps.max_elem_sz) {
		pr_info("elements don't fit in device constraints\n");
		pr_info("map elements too large: %u, FW max element size (key+value): %u\n",
			round_up(offmap->map.key_size, 8) +
			round_up(offmap->map.value_size, 8),
			bpf->maps.max_elem_sz);
		return -ENOMEM;
	}
	if (offmap->map.key_size > bpf->maps.max_key_sz) {
		pr_info("map key size %u, FW max is %u\n",
			offmap->map.key_size, bpf->maps.max_key_sz);
		return -ENOMEM;
	}
	if (offmap->map.value_size > bpf->maps.max_val_sz) {
		pr_info("map value size %u, FW max is %u\n",
			offmap->map.value_size, bpf->maps.max_val_sz);
		return -ENOMEM;
	}

@@ -453,43 +466,43 @@ nfp_bpf_perf_event_copy(void *dst, const void *src,
	return 0;
}

int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data,
			 unsigned int len)
{
	struct cmsg_bpf_event *cbe = (void *)skb->data;
	u32 pkt_size, data_size;
	struct bpf_map *map;
	struct cmsg_bpf_event *cbe = (void *)data;
	struct nfp_bpf_neutral_map *record;
	u32 pkt_size, data_size, map_id;
	u64 map_id_full;

	if (skb->len < sizeof(struct cmsg_bpf_event))
		goto err_drop;
	if (len < sizeof(struct cmsg_bpf_event))
		return -EINVAL;

	pkt_size = be32_to_cpu(cbe->pkt_size);
	data_size = be32_to_cpu(cbe->data_size);
	map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
	map_id_full = be64_to_cpu(cbe->map_ptr);
	map_id = map_id_full;

	if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
		goto err_drop;
	if (len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
		return -EINVAL;
	if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
		goto err_drop;
		return -EINVAL;

	rcu_read_lock();
	if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
				    nfp_bpf_maps_neutral_params)) {
	record = rhashtable_lookup_fast(&bpf->maps_neutral, &map_id,
					nfp_bpf_maps_neutral_params);
	if (!record || map_id_full > U32_MAX) {
		rcu_read_unlock();
		pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
			map);
		goto err_drop;
		cmsg_warn(bpf, "perf event: map id %lld (0x%llx) not recognized, dropping event\n",
			  map_id_full, map_id_full);
		return -EINVAL;
	}

	bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
	bpf_event_output(record->ptr, be32_to_cpu(cbe->cpu_id),
			 &cbe->data[round_up(pkt_size, 4)], data_size,
			 cbe->data, pkt_size, nfp_bpf_perf_event_copy);
	rcu_read_unlock();

	dev_consume_skb_any(skb);
	return 0;
err_drop:
	dev_kfree_skb_any(skb);
	return -EINVAL;
}

static int
Loading