Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7f0b8000 authored by David S. Miller's avatar David S. Miller
Browse files


Daniel Borkmann says:

====================
pull-request: bpf-next 2018-01-07

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Add a start of a framework for extending struct xdp_buff without
   having the overhead of populating every data at runtime. Idea
   is to have a new per-queue struct xdp_rxq_info that holds read
   mostly data (currently that is, queue number and a pointer to
   the corresponding netdev) which is set up during rxqueue config
   time. When a XDP program is invoked, struct xdp_buff holds a
   pointer to struct xdp_rxq_info that the BPF program can then
   walk. The user facing BPF program that uses struct xdp_md for
   context can use these members directly, and the verifier rewrites
   context access transparently by walking the xdp_rxq_info and
   net_device pointers to load the data, from Jesper.

2) Redo the reporting of offload device information to user space
   such that it works in combination with network namespaces. The
   latter is reported through a device/inode tuple as similarly
   done in other subsystems as well (e.g. perf) in order to identify
   the namespace. For this to work, ns_get_path() has been generalized
   such that the namespace can be retrieved not only from a specific
   task (perf case), but also from a callback where we deduce the
   netns (ns_common) from a netdevice. bpftool support using the new
   uapi info and extensive test cases for test_offload.py in BPF
   selftests have been added as well, from Jakub.

3) Add two bpftool improvements: i) properly report the bpftool
   version such that it corresponds to the version from the kernel
   source tree. So pick the right linux/version.h from the source
   tree instead of the installed one. ii) fix bpftool and also
   bpf_jit_disasm build with bintutils >= 2.9. The reason for the
   build breakage is that binutils library changed the function
   signature to select the disassembler. Given this is needed in
   multiple tools, add a proper feature detection to the
   tools/build/features infrastructure, from Roman.

4) Implement the BPF syscall command BPF_MAP_GET_NEXT_KEY for the
   stacktrace map. It is currently unimplemented, but there are
   use cases where user space needs to walk all stacktrace map
   entries e.g. for dumping or deleting map entries w/o having to
   close and recreate the map. Add BPF selftests along with it,
   from Yonghong.

5) Few follow-up cleanups for the bpftool cgroup code: i) rename
   the cgroup 'list' command into 'show' as we have it for other
   subcommands as well, ii) then alias the 'show' command such that
   'list' is accepted which is also common practice in iproute2,
   and iii) remove couple of newlines from error messages using
   p_err(), from Jakub.

6) Two follow-up cleanups to sockmap code: i) remove the unused
   bpf_compute_data_end_sk_skb() function and ii) only build the
   sockmap infrastructure when CONFIG_INET is enabled since it's
   only aware of TCP sockets at this time, from John.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0adb51e 9be99bad
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
		if (rxr->xdp_prog)
			bpf_prog_put(rxr->xdp_prog);

		if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
			xdp_rxq_info_unreg(&rxr->xdp_rxq);

		kfree(rxr->rx_tpa);
		rxr->rx_tpa = NULL;

@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)

		ring = &rxr->rx_ring_struct;

		rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
		if (rc < 0)
			return rc;

		rc = bnxt_alloc_ring(bp, ring);
		if (rc)
			return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
	bp->cp_ring_mask = bp->cp_bit - 1;
}

/* Changing allocation mode of RX rings.
 * TODO: Update when extending xdp_rxq_info to support allocation modes.
 */
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
	if (page_mode) {
+2 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/switchdev.h>
#include <net/xdp.h>

struct tx_bd {
	__le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {

	struct bnxt_ring_struct	rx_ring_struct;
	struct bnxt_ring_struct	rx_agg_ring_struct;
	struct xdp_rxq_info	xdp_rxq;
};

struct bnxt_cp_ring_info {
+1 −0
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
	xdp.data = *data_ptr;
	xdp_set_data_meta_invalid(&xdp);
	xdp.data_end = *data_ptr + *len;
	xdp.rxq = &rxr->xdp_rxq;
	orig_data = xdp.data;
	mapping = rx_buf->mapping - bp->rx_dma_offset;

+7 −4
Original line number Diff line number Diff line
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)

static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
				struct sk_buff **skb)
				struct rcv_queue *rq, struct sk_buff **skb)
{
	struct xdp_buff xdp;
	struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
	xdp.data = (void *)cpu_addr;
	xdp_set_data_meta_invalid(&xdp);
	xdp.data_end = xdp.data + len;
	xdp.rxq = &rq->xdp_rxq;
	orig_data = xdp.data;

	rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,

static void nicvf_rcv_pkt_handler(struct net_device *netdev,
				  struct napi_struct *napi,
				  struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
				  struct cqe_rx_t *cqe_rx,
				  struct snd_queue *sq, struct rcv_queue *rq)
{
	struct sk_buff *skb = NULL;
	struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
	/* For XDP, ignore pkts spanning multiple pages */
	if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
		/* Packet consumed by XDP */
		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
			return;
	} else {
		skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
	struct cqe_rx_t *cq_desc;
	struct netdev_queue *txq;
	struct snd_queue *sq = &qs->sq[cq_idx];
	struct rcv_queue *rq = &qs->rq[cq_idx];
	unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;

	spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,

		switch (cq_desc->cqe_type) {
		case CQE_TYPE_RX:
			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
			work_done++;
		break;
		case CQE_TYPE_SEND:
+4 −0
Original line number Diff line number Diff line
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,

	if (!rq->enable) {
		nicvf_reclaim_rcv_queue(nic, qs, qidx);
		xdp_rxq_info_unreg(&rq->xdp_rxq);
		return;
	}

@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
	/* all writes of RBDR data to be loaded into L2 Cache as well*/
	rq->caching = 1;

	/* Driver have no proper error path for failed XDP RX-queue info reg */
	WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);

	/* Send a mailbox msg to PF to config RQ */
	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
	mbx.rq.qs_num = qs->vnic_id;
Loading