Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d4069fe6 authored by David S. Miller's avatar David S. Miller
Browse files


Daniel Borkmann says:

====================
pull-request: bpf-next 2018-03-31

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add raw BPF tracepoint API in order to have a BPF program type that
   can access kernel internal arguments of the tracepoints in their
   raw form similar to kprobes based BPF programs. This infrastructure
   also adds a new BPF_RAW_TRACEPOINT_OPEN command to BPF syscall which
   returns an anon-inode backed fd for the tracepoint object that allows
   for automatic detach of the BPF program resp. unregistering of the
   tracepoint probe on fd release, from Alexei.

2) Add new BPF cgroup hooks at bind() and connect() entry in order to
   allow BPF programs to reject, inspect or modify user space passed
   struct sockaddr, and as well a hook at post bind time once the port
   has been allocated. They are used in FB's container management engine
   for implementing policy, replacing fragile LD_PRELOAD wrapper
   intercepting bind() and connect() calls that only works in limited
   scenarios like glibc based apps but not for other runtimes in
   containerized applications, from Andrey.

3) BPF_F_INGRESS flag support has been added to sockmap programs for
   their redirect helper call bringing it in line with cls_bpf based
   programs. Support is added for both variants of sockmap programs,
   meaning for tx ULP hooks as well as recv skb hooks, from John.

4) Various improvements on BPF side for the nfp driver, besides others
   this work adds BPF map update and delete helper call support from
   the datapath, JITing of 32 and 64 bit XADD instructions as well as
   offload support of bpf_get_prandom_u32() call. Initial implementation
   of nfp packet cache has been tackled that optimizes memory access
   (see merge commit for further details), from Jakub and Jiong.

5) Removal of struct bpf_verifier_env argument from the print_bpf_insn()
   API has been done in order to prepare to use print_bpf_insn() soon
   out of perf tool directly. This makes the print_bpf_insn() API more
   generic and pushes the env into private data. bpftool is adjusted
   as well with the print_bpf_insn() argument removal, from Jiri.

6) Couple of cleanups and prep work for the upcoming BTF (BPF Type
   Format). The latter will reuse the current BPF verifier log as
   well, thus bpf_verifier_log() is further generalized, from Martin.

7) For bpf_getsockopt() and bpf_setsockopt() helpers, IPv4 IP_TOS read
   and write support has been added in similar fashion to existing
   IPv6 IPV6_TCLASS socket option we already have, from Nikita.

8) Fixes in recent sockmap scatterlist API usage, which did not use
   sg_init_table() for initialization thus triggering a BUG_ON() in
   scatterlist API when CONFIG_DEBUG_SG was enabled. This adds and
   uses a small helper sg_init_marker() to properly handle the affected
   cases, from Prashant.

9) Let the BPF core follow IDR code convention and therefore use the
   idr_preload() and idr_preload_end() helpers, which would also help
   idr_alloc_cyclic() under GFP_ATOMIC to better succeed under memory
   pressure, from Shaohua.

10) Last but not least, a spelling fix in an error message for the
    BPF cookie UID helper under BPF sample code, from Colin.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 70ae7222 7828f20e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1153,7 +1153,7 @@ static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
	cinfo.sdma_ring_size = fd->cq->nentries;
	cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;

	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
	trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, &cinfo);
	if (copy_to_user((void __user *)arg, &cinfo, len))
		return -EFAULT;

+6 −6
Original line number Diff line number Diff line
@@ -106,7 +106,7 @@ TRACE_EVENT(hfi1_uctxtdata,
TRACE_EVENT(hfi1_ctxt_info,
	    TP_PROTO(struct hfi1_devdata *dd, unsigned int ctxt,
		     unsigned int subctxt,
		     struct hfi1_ctxt_info cinfo),
		     struct hfi1_ctxt_info *cinfo),
	    TP_ARGS(dd, ctxt, subctxt, cinfo),
	    TP_STRUCT__entry(DD_DEV_ENTRY(dd)
			     __field(unsigned int, ctxt)
@@ -120,11 +120,11 @@ TRACE_EVENT(hfi1_ctxt_info,
	    TP_fast_assign(DD_DEV_ASSIGN(dd);
			    __entry->ctxt = ctxt;
			    __entry->subctxt = subctxt;
			    __entry->egrtids = cinfo.egrtids;
			    __entry->rcvhdrq_cnt = cinfo.rcvhdrq_cnt;
			    __entry->rcvhdrq_size = cinfo.rcvhdrq_entsize;
			    __entry->sdma_ring_size = cinfo.sdma_ring_size;
			    __entry->rcvegr_size = cinfo.rcvegr_size;
			    __entry->egrtids = cinfo->egrtids;
			    __entry->rcvhdrq_cnt = cinfo->rcvhdrq_cnt;
			    __entry->rcvhdrq_size = cinfo->rcvhdrq_entsize;
			    __entry->sdma_ring_size = cinfo->sdma_ring_size;
			    __entry->rcvegr_size = cinfo->rcvegr_size;
			    ),
	    TP_printk("[%s] ctxt %u:%u " CINFO_FMT,
		      __get_str(dev),
+6 −6
Original line number Diff line number Diff line
@@ -218,17 +218,17 @@ nfp_bpf_cmsg_communicate(struct nfp_app_bpf *bpf, struct sk_buff *skb,
		return skb;

	hdr = (struct cmsg_hdr *)skb->data;
	/* 0 reply_size means caller will do the validation */
	if (reply_size && skb->len != reply_size) {
		cmsg_warn(bpf, "cmsg drop - wrong size %d != %d!\n",
			  skb->len, reply_size);
		goto err_free;
	}
	if (hdr->type != __CMSG_REPLY(type)) {
		cmsg_warn(bpf, "cmsg drop - wrong type 0x%02x != 0x%02lx!\n",
			  hdr->type, __CMSG_REPLY(type));
		goto err_free;
	}
	/* 0 reply_size means caller will do the validation */
	if (reply_size && skb->len != reply_size) {
		cmsg_warn(bpf, "cmsg drop - type 0x%02x wrong size %d != %d!\n",
			  type, skb->len, reply_size);
		goto err_free;
	}

	return skb;
err_free:
+1 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ enum bpf_cap_tlv_type {
	NFP_BPF_CAP_TYPE_FUNC		= 1,
	NFP_BPF_CAP_TYPE_ADJUST_HEAD	= 2,
	NFP_BPF_CAP_TYPE_MAPS		= 3,
	NFP_BPF_CAP_TYPE_RANDOM		= 4,
};

struct nfp_bpf_cap_tlv_func {
+426 −36
Original line number Diff line number Diff line
@@ -103,23 +103,18 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
/* --- Emitters --- */
static void
__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir)
	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
	   bool indir)
{
	enum cmd_ctx_swap ctx;
	u64 insn;

	if (sync)
		ctx = CMD_CTX_SWAP;
	else
		ctx = CMD_CTX_NO_SWAP;

	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
		FIELD_PREP(OP_CMD_CTX, ctx) |
		FIELD_PREP(OP_CMD_B_SRC, breg) |
		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
		FIELD_PREP(OP_CMD_XFER, xfer) |
		FIELD_PREP(OP_CMD_CNT, size) |
		FIELD_PREP(OP_CMD_SIG, sync) |
		FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
		FIELD_PREP(OP_CMD_INDIR, indir) |
		FIELD_PREP(OP_CMD_MODE, mode);
@@ -129,7 +124,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,

static void
emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
	     swreg lreg, swreg rreg, u8 size, bool sync, bool indir)
	     swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
{
	struct nfp_insn_re_regs reg;
	int err;
@@ -150,22 +145,22 @@ emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
		return;
	}

	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync,
	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
		   indir);
}

static void
emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
	 swreg lreg, swreg rreg, u8 size, bool sync)
	 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
{
	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false);
	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
}

static void
emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
	       swreg lreg, swreg rreg, u8 size, bool sync)
	       swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
{
	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
	emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
}

static void
@@ -410,7 +405,7 @@ __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
		FIELD_PREP(OP_LCSR_A_SRC, areg) |
		FIELD_PREP(OP_LCSR_B_SRC, breg) |
		FIELD_PREP(OP_LCSR_WRITE, wr) |
		FIELD_PREP(OP_LCSR_ADDR, addr) |
		FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
		FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
		FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);

@@ -438,10 +433,16 @@ static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
		return;
	}

	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4,
	__emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
		    false, reg.src_lmextn);
}

/* CSR value is read in following immed[gpr, 0] */
static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
{
	__emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
}

static void emit_nop(struct nfp_prog *nfp_prog)
{
	__emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
@@ -553,6 +554,19 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
	emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
}

/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
 * result to @dst from offset, there is no change on the other bits of @dst.
 */
static void
wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
		   u8 field_len, u8 offset)
{
	enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
	u8 mask = ((1 << field_len) - 1) << offset;

	emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
}

static void
addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
	      swreg *rega, swreg *regb)
@@ -597,7 +611,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
	/* Memory read from source addr into transfer-in registers. */
	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
		     src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
		     src_base, off, xfer_num - 1, true, len > 32);
		     src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);

	/* Move from transfer-in to transfer-out. */
	for (i = 0; i < xfer_num; i++)
@@ -609,39 +623,39 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
		/* Use single direct_ref write8. */
		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
			 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
			 true);
			 CMD_CTX_SWAP);
	} else if (len <= 32 && IS_ALIGNED(len, 4)) {
		/* Use single direct_ref write32. */
		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
			 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
			 true);
			 CMD_CTX_SWAP);
	} else if (len <= 32) {
		/* Use single indirect_ref write8. */
		wrp_immed(nfp_prog, reg_none(),
			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
			       reg_a(meta->paired_st->dst_reg * 2), off,
			       len - 1, true);
			       len - 1, CMD_CTX_SWAP);
	} else if (IS_ALIGNED(len, 4)) {
		/* Use single indirect_ref write32. */
		wrp_immed(nfp_prog, reg_none(),
			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
			       reg_a(meta->paired_st->dst_reg * 2), off,
			       xfer_num - 1, true);
			       xfer_num - 1, CMD_CTX_SWAP);
	} else if (len <= 40) {
		/* Use one direct_ref write32 to write the first 32-bytes, then
		 * another direct_ref write8 to write the remaining bytes.
		 */
		emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
			 reg_a(meta->paired_st->dst_reg * 2), off, 7,
			 true);
			 CMD_CTX_SWAP);

		off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
				      imm_b(nfp_prog));
		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
			 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
			 true);
			 CMD_CTX_SWAP);
	} else {
		/* Use one indirect_ref write32 to write 4-bytes aligned length,
		 * then another direct_ref write8 to write the remaining bytes.
@@ -652,12 +666,12 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
		emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
			       reg_a(meta->paired_st->dst_reg * 2), off,
			       xfer_num - 2, true);
			       xfer_num - 2, CMD_CTX_SWAP);
		new_off = meta->paired_st->off + (xfer_num - 1) * 4;
		off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
		emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
			 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
			 (len & 0x3) - 1, true);
			 (len & 0x3) - 1, CMD_CTX_SWAP);
	}

	/* TODO: The following extra load is to make sure data flow be identical
@@ -718,7 +732,7 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
	shift = size < 4 ? 4 - size : 0;

	emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
		 pptr_reg(nfp_prog), offset, sz - 1, true);
		 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);

	i = 0;
	if (shift)
@@ -748,7 +762,7 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
	mask = size < 4 ? GENMASK(size - 1, 0) : 0;

	emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
		 lreg, rreg, sz / 4 - 1, true);
		 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);

	i = 0;
	if (mask)
@@ -828,7 +842,7 @@ data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
		wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));

	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
		 reg_a(dst_gpr), offset, size - 1, true);
		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);

	return 0;
}
@@ -842,7 +856,7 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
		wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);

	emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
		 reg_a(dst_gpr), offset, size - 1, true);
		 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);

	return 0;
}
@@ -1339,7 +1353,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
}

static int
map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
	struct bpf_offloaded_map *offmap;
	struct nfp_bpf_map *nfp_map;
@@ -1353,19 +1367,21 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)

	/* We only have to reload LM0 if the key is not at start of stack */
	lm_off = nfp_prog->stack_depth;
	lm_off += meta->arg2.var_off.value + meta->arg2.off;
	load_lm_ptr = meta->arg2_var_off || lm_off;
	lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
	load_lm_ptr = meta->arg2.var_off || lm_off;

	/* Set LM0 to start of key */
	if (load_lm_ptr)
		emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
	if (meta->func_id == BPF_FUNC_map_update_elem)
		emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);

	/* Load map ID into a register, it should actually fit as an immediate
	 * but in case it doesn't deal with it here, not in the delay slots.
	 */
	tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));

	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem,
	emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
		     2, RELO_BR_HELPER);
	ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;

@@ -1388,6 +1404,18 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
	return 0;
}

static int
nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
	__emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
	/* CSR value is read in following immed[gpr, 0] */
	emit_immed(nfp_prog, reg_both(0), 0,
		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
	emit_immed(nfp_prog, reg_both(1), 0,
		   IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
	return 0;
}

/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -1838,6 +1866,128 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
					 tmp_reg, meta->insn.dst_reg * 2, size);
}

static void
mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
			   struct nfp_insn_meta *meta)
{
	s16 range_start = meta->pkt_cache.range_start;
	s16 range_end = meta->pkt_cache.range_end;
	swreg src_base, off;
	u8 xfer_num, len;
	bool indir;

	off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
	src_base = reg_a(meta->insn.src_reg * 2);
	len = range_end - range_start;
	xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;

	indir = len > 8 * REG_WIDTH;
	/* Setup PREV_ALU for indirect mode. */
	if (indir)
		wrp_immed(nfp_prog, reg_none(),
			  CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));

	/* Cache memory into transfer-in registers. */
	emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
		     off, xfer_num - 1, CMD_CTX_SWAP, indir);
}

static int
mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
				     struct nfp_insn_meta *meta,
				     unsigned int size)
{
	s16 range_start = meta->pkt_cache.range_start;
	s16 insn_off = meta->insn.off - range_start;
	swreg dst_lo, dst_hi, src_lo, src_mid;
	u8 dst_gpr = meta->insn.dst_reg * 2;
	u8 len_lo = size, len_mid = 0;
	u8 idx = insn_off / REG_WIDTH;
	u8 off = insn_off % REG_WIDTH;

	dst_hi = reg_both(dst_gpr + 1);
	dst_lo = reg_both(dst_gpr);
	src_lo = reg_xfer(idx);

	/* The read length could involve as many as three registers. */
	if (size > REG_WIDTH - off) {
		/* Calculate the part in the second register. */
		len_lo = REG_WIDTH - off;
		len_mid = size - len_lo;

		/* Calculate the part in the third register. */
		if (size > 2 * REG_WIDTH - off)
			len_mid = REG_WIDTH;
	}

	wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);

	if (!len_mid) {
		wrp_immed(nfp_prog, dst_hi, 0);
		return 0;
	}

	src_mid = reg_xfer(idx + 1);

	if (size <= REG_WIDTH) {
		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
		wrp_immed(nfp_prog, dst_hi, 0);
	} else {
		swreg src_hi = reg_xfer(idx + 2);

		wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
				   REG_WIDTH - len_lo, len_lo);
		wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
				REG_WIDTH - len_lo);
		wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
				   len_lo);
	}

	return 0;
}

static int
mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
				   struct nfp_insn_meta *meta,
				   unsigned int size)
{
	swreg dst_lo, dst_hi, src_lo;
	u8 dst_gpr, idx;

	idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
	dst_gpr = meta->insn.dst_reg * 2;
	dst_hi = reg_both(dst_gpr + 1);
	dst_lo = reg_both(dst_gpr);
	src_lo = reg_xfer(idx);

	if (size < REG_WIDTH) {
		wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
		wrp_immed(nfp_prog, dst_hi, 0);
	} else if (size == REG_WIDTH) {
		wrp_mov(nfp_prog, dst_lo, src_lo);
		wrp_immed(nfp_prog, dst_hi, 0);
	} else {
		swreg src_hi = reg_xfer(idx + 1);

		wrp_mov(nfp_prog, dst_lo, src_lo);
		wrp_mov(nfp_prog, dst_hi, src_hi);
	}

	return 0;
}

static int
mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
			   struct nfp_insn_meta *meta, unsigned int size)
{
	u8 off = meta->insn.off - meta->pkt_cache.range_start;

	if (IS_ALIGNED(off, REG_WIDTH))
		return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);

	return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
}

static int
mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
	unsigned int size)
@@ -1852,8 +2002,16 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
			return mem_ldx_skb(nfp_prog, meta, size);
	}

	if (meta->ptr.type == PTR_TO_PACKET)
	if (meta->ptr.type == PTR_TO_PACKET) {
		if (meta->pkt_cache.range_end) {
			if (meta->pkt_cache.do_init)
				mem_ldx_data_init_pktcache(nfp_prog, meta);

			return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
		} else {
			return mem_ldx_data(nfp_prog, meta, size);
		}
	}

	if (meta->ptr.type == PTR_TO_STACK)
		return mem_ldx_stack(nfp_prog, meta, size,
@@ -1982,6 +2140,111 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
	return mem_stx(nfp_prog, meta, 8);
}

static int
mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
{
	u8 dst_gpr = meta->insn.dst_reg * 2;
	u8 src_gpr = meta->insn.src_reg * 2;
	unsigned int full_add, out;
	swreg addra, addrb, off;

	off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));

	/* We can fit 16 bits into command immediate, if we know the immediate
	 * is guaranteed to either always or never fit into 16 bit we only
	 * generate code to handle that particular case, otherwise generate
	 * code for both.
	 */
	out = nfp_prog_current_offset(nfp_prog);
	full_add = nfp_prog_current_offset(nfp_prog);

	if (meta->insn.off) {
		out += 2;
		full_add += 2;
	}
	if (meta->xadd_maybe_16bit) {
		out += 3;
		full_add += 3;
	}
	if (meta->xadd_over_16bit)
		out += 2 + is64;
	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
		out += 5;
		full_add += 5;
	}

	/* Generate the branch for choosing add_imm vs add */
	if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
		swreg max_imm = imm_a(nfp_prog);

		wrp_immed(nfp_prog, max_imm, 0xffff);
		emit_alu(nfp_prog, reg_none(),
			 max_imm, ALU_OP_SUB, reg_b(src_gpr));
		emit_alu(nfp_prog, reg_none(),
			 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
		emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
		/* defer for add */
	}

	/* If insn has an offset add to the address */
	if (!meta->insn.off) {
		addra = reg_a(dst_gpr);
		addrb = reg_b(dst_gpr + 1);
	} else {
		emit_alu(nfp_prog, imma_a(nfp_prog),
			 reg_a(dst_gpr), ALU_OP_ADD, off);
		emit_alu(nfp_prog, imma_b(nfp_prog),
			 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
		addra = imma_a(nfp_prog);
		addrb = imma_b(nfp_prog);
	}

	/* Generate the add_imm if 16 bits are possible */
	if (meta->xadd_maybe_16bit) {
		swreg prev_alu = imm_a(nfp_prog);

		wrp_immed(nfp_prog, prev_alu,
			  FIELD_PREP(CMD_OVE_DATA, 2) |
			  CMD_OVE_LEN |
			  FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
		wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
		emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
			       addra, addrb, 0, CMD_CTX_NO_SWAP);

		if (meta->xadd_over_16bit)
			emit_br(nfp_prog, BR_UNC, out, 0);
	}

	if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
		return -EINVAL;

	/* Generate the add if 16 bits are not guaranteed */
	if (meta->xadd_over_16bit) {
		emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
			 addra, addrb, is64 << 2,
			 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);

		wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
		if (is64)
			wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
	}

	if (!nfp_prog_confirm_current_offset(nfp_prog, out))
		return -EINVAL;

	return 0;
}

static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
	return mem_xadd(nfp_prog, meta, false);
}

static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
	return mem_xadd(nfp_prog, meta, true);
}

static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
@@ -2183,7 +2446,11 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
	case BPF_FUNC_xdp_adjust_head:
		return adjust_head(nfp_prog, meta);
	case BPF_FUNC_map_lookup_elem:
		return map_lookup_stack(nfp_prog, meta);
	case BPF_FUNC_map_update_elem:
	case BPF_FUNC_map_delete_elem:
		return map_call_stack_common(nfp_prog, meta);
	case BPF_FUNC_get_prandom_u32:
		return nfp_get_prandom_u32(nfp_prog, meta);
	default:
		WARN_ONCE(1, "verifier allowed unsupported function\n");
		return -EOPNOTSUPP;
@@ -2243,6 +2510,8 @@ static const instr_cb_t instr_cb[256] = {
	[BPF_STX | BPF_MEM | BPF_H] =	mem_stx2,
	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
	[BPF_STX | BPF_MEM | BPF_DW] =	mem_stx8,
	[BPF_STX | BPF_XADD | BPF_W] =	mem_xadd4,
	[BPF_STX | BPF_XADD | BPF_DW] =	mem_xadd8,
	[BPF_ST | BPF_MEM | BPF_B] =	mem_st1,
	[BPF_ST | BPF_MEM | BPF_H] =	mem_st2,
	[BPF_ST | BPF_MEM | BPF_W] =	mem_st4,
@@ -2821,6 +3090,120 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
	}
}

static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
{
	struct nfp_insn_meta *meta, *range_node = NULL;
	s16 range_start = 0, range_end = 0;
	bool cache_avail = false;
	struct bpf_insn *insn;
	s32 range_ptr_off = 0;
	u32 range_ptr_id = 0;

	list_for_each_entry(meta, &nfp_prog->insns, l) {
		if (meta->flags & FLAG_INSN_IS_JUMP_DST)
			cache_avail = false;

		if (meta->skip)
			continue;

		insn = &meta->insn;

		if (is_mbpf_store_pkt(meta) ||
		    insn->code == (BPF_JMP | BPF_CALL) ||
		    is_mbpf_classic_store_pkt(meta) ||
		    is_mbpf_classic_load(meta)) {
			cache_avail = false;
			continue;
		}

		if (!is_mbpf_load(meta))
			continue;

		if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
			cache_avail = false;
			continue;
		}

		if (!cache_avail) {
			cache_avail = true;
			if (range_node)
				goto end_current_then_start_new;
			goto start_new;
		}

		/* Check ID to make sure two reads share the same
		 * variable offset against PTR_TO_PACKET, and check OFF
		 * to make sure they also share the same constant
		 * offset.
		 *
		 * OFFs don't really need to be the same, because they
		 * are the constant offsets against PTR_TO_PACKET, so
		 * for different OFFs, we could canonicalize them to
		 * offsets against original packet pointer. We don't
		 * support this.
		 */
		if (meta->ptr.id == range_ptr_id &&
		    meta->ptr.off == range_ptr_off) {
			s16 new_start = range_start;
			s16 end, off = insn->off;
			s16 new_end = range_end;
			bool changed = false;

			if (off < range_start) {
				new_start = off;
				changed = true;
			}

			end = off + BPF_LDST_BYTES(insn);
			if (end > range_end) {
				new_end = end;
				changed = true;
			}

			if (!changed)
				continue;

			if (new_end - new_start <= 64) {
				/* Install new range. */
				range_start = new_start;
				range_end = new_end;
				continue;
			}
		}

end_current_then_start_new:
		range_node->pkt_cache.range_start = range_start;
		range_node->pkt_cache.range_end = range_end;
start_new:
		range_node = meta;
		range_node->pkt_cache.do_init = true;
		range_ptr_id = range_node->ptr.id;
		range_ptr_off = range_node->ptr.off;
		range_start = insn->off;
		range_end = insn->off + BPF_LDST_BYTES(insn);
	}

	if (range_node) {
		range_node->pkt_cache.range_start = range_start;
		range_node->pkt_cache.range_end = range_end;
	}

	list_for_each_entry(meta, &nfp_prog->insns, l) {
		if (meta->skip)
			continue;

		if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
			if (meta->pkt_cache.do_init) {
				range_start = meta->pkt_cache.range_start;
				range_end = meta->pkt_cache.range_end;
			} else {
				meta->pkt_cache.range_start = range_start;
				meta->pkt_cache.range_end = range_end;
			}
		}
	}
}

static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
{
	nfp_bpf_opt_reg_init(nfp_prog);
@@ -2828,6 +3211,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
	nfp_bpf_opt_ld_mask(nfp_prog);
	nfp_bpf_opt_ld_shift(nfp_prog);
	nfp_bpf_opt_ldst_gather(nfp_prog);
	nfp_bpf_opt_pkt_cache(nfp_prog);

	return 0;
}
@@ -2952,6 +3336,12 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
			case BPF_FUNC_map_lookup_elem:
				val = nfp_prog->bpf->helpers.map_lookup;
				break;
			case BPF_FUNC_map_update_elem:
				val = nfp_prog->bpf->helpers.map_update;
				break;
			case BPF_FUNC_map_delete_elem:
				val = nfp_prog->bpf->helpers.map_delete;
				break;
			default:
				pr_err("relocation of unknown helper %d\n",
				       val);
Loading