Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 11d16edb authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'xdp_rxq_info'

Jesper Dangaard Brouer says:

====================
V4:
* Added reviewers/acks to patches
* Fix patch desc in i40e that got out-of-sync with code
* Add SPDX license headers for the two new files added in patch 14

V3:
* Fixed bug in virtio_net driver
* Removed export of xdp_rxq_info_init()

V2:
* Changed API exposed to drivers
  - Removed invocation of "init" in drivers, and only call "reg"
    (Suggested by Saeed)
  - Allow "reg" to fail and handle this in drivers
    (Suggested by David Ahern)
* Removed the SINKQ qtype, instead allow to register as "unused"
* Also fixed some drivers during testing on actual HW (noted in patches)

There is a need for XDP to know more about the RX-queue a given XDP
frames have arrived on.  For both the XDP bpf-prog and kernel side.

Instead of extending struct xdp_buff each time new info is needed,
this patchset takes a different approach.  Struct xdp_buff is only
extended with a pointer to a struct xdp_rxq_info (allowing for easier
extending this later).  This xdp_rxq_info contains information related
to how the driver have setup the individual RX-queue's.  This is
read-mostly information, and all xdp_buff frames (in drivers
napi_poll) point to the same xdp_rxq_info (per RX-queue).

We stress this data/cache-line is for read-mostly info.  This is NOT
for dynamic per packet info, use the data_meta for such use-cases.

This patchset start out small, and only expose ingress_ifindex and the
RX-queue index to the XDP/BPF program. Access to tangible info like
the ingress ifindex and RX queue index, is fairly easy to comprehent.
The other future use-cases could allow XDP frames to be recycled back
to the originating device driver, by providing info on RX device and
queue number.

As XDP doesn't have driver feature flags, and eBPF code due to
bpf-tail-calls cannot determine that XDP driver invoke it, this
patchset have to update every driver that support XDP.

For driver developers (review individual driver patches!):

The xdp_rxq_info is tied to the drivers RX-ring(s). Whenever a RX-ring
modification require (temporary) stopping RX frames, then the
xdp_rxq_info should (likely) also be unregistred and re-registered,
especially if reallocating the pages in the ring. Make sure ethtool
set_channels does the right thing. When replacing XDP prog, if and
only if RX-ring need to be changed, then also re-register the
xdp_rxq_info.

I'm Cc'ing the individual driver patches to the registered maintainers.

Testing:

I've only tested the NIC drivers I have hardware for.  The general
test procedure is to (DUT = Device Under Test):
 (1) run pktgen script pktgen_sample04_many_flows.sh       (against DUT)
 (2) run samples/bpf program xdp_rxq_info --dev $DEV       (on DUT)
 (3) runtime modify number of NIC queues via ethtool -L    (on DUT)
 (4) runtime modify number of NIC ring-size via ethtool -G (on DUT)

Patch based on git tree bpf-next (at commit fb982666):
 https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/


====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 5f103c5d 0fca931a
Loading
Loading
Loading
Loading
+10 −0
Original line number Original line Diff line number Diff line
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
		if (rxr->xdp_prog)
		if (rxr->xdp_prog)
			bpf_prog_put(rxr->xdp_prog);
			bpf_prog_put(rxr->xdp_prog);


		if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
			xdp_rxq_info_unreg(&rxr->xdp_rxq);

		kfree(rxr->rx_tpa);
		kfree(rxr->rx_tpa);
		rxr->rx_tpa = NULL;
		rxr->rx_tpa = NULL;


@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)


		ring = &rxr->rx_ring_struct;
		ring = &rxr->rx_ring_struct;


		rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
		if (rc < 0)
			return rc;

		rc = bnxt_alloc_ring(bp, ring);
		rc = bnxt_alloc_ring(bp, ring);
		if (rc)
		if (rc)
			return rc;
			return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
	bp->cp_ring_mask = bp->cp_bit - 1;
	bp->cp_ring_mask = bp->cp_bit - 1;
}
}


/* Changing allocation mode of RX rings.
 * TODO: Update when extending xdp_rxq_info to support allocation modes.
 */
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
{
	if (page_mode) {
	if (page_mode) {
+2 −0
Original line number Original line Diff line number Diff line
@@ -23,6 +23,7 @@
#include <net/devlink.h>
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/dst_metadata.h>
#include <net/switchdev.h>
#include <net/switchdev.h>
#include <net/xdp.h>


struct tx_bd {
struct tx_bd {
	__le32 tx_bd_len_flags_type;
	__le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {


	struct bnxt_ring_struct	rx_ring_struct;
	struct bnxt_ring_struct	rx_ring_struct;
	struct bnxt_ring_struct	rx_agg_ring_struct;
	struct bnxt_ring_struct	rx_agg_ring_struct;
	struct xdp_rxq_info	xdp_rxq;
};
};


struct bnxt_cp_ring_info {
struct bnxt_cp_ring_info {
+1 −0
Original line number Original line Diff line number Diff line
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
	xdp.data = *data_ptr;
	xdp.data = *data_ptr;
	xdp_set_data_meta_invalid(&xdp);
	xdp_set_data_meta_invalid(&xdp);
	xdp.data_end = *data_ptr + *len;
	xdp.data_end = *data_ptr + *len;
	xdp.rxq = &rxr->xdp_rxq;
	orig_data = xdp.data;
	orig_data = xdp.data;
	mapping = rx_buf->mapping - bp->rx_dma_offset;
	mapping = rx_buf->mapping - bp->rx_dma_offset;


+7 −4
Original line number Original line Diff line number Diff line
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)


static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
				struct sk_buff **skb)
				struct rcv_queue *rq, struct sk_buff **skb)
{
{
	struct xdp_buff xdp;
	struct xdp_buff xdp;
	struct page *page;
	struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
	xdp.data = (void *)cpu_addr;
	xdp.data = (void *)cpu_addr;
	xdp_set_data_meta_invalid(&xdp);
	xdp_set_data_meta_invalid(&xdp);
	xdp.data_end = xdp.data + len;
	xdp.data_end = xdp.data + len;
	xdp.rxq = &rq->xdp_rxq;
	orig_data = xdp.data;
	orig_data = xdp.data;


	rcu_read_lock();
	rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,


static void nicvf_rcv_pkt_handler(struct net_device *netdev,
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
				  struct napi_struct *napi,
				  struct napi_struct *napi,
				  struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
				  struct cqe_rx_t *cqe_rx,
				  struct snd_queue *sq, struct rcv_queue *rq)
{
{
	struct sk_buff *skb = NULL;
	struct sk_buff *skb = NULL;
	struct nicvf *nic = netdev_priv(netdev);
	struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
	/* For XDP, ignore pkts spanning multiple pages */
	/* For XDP, ignore pkts spanning multiple pages */
	if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
	if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
		/* Packet consumed by XDP */
		/* Packet consumed by XDP */
		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
			return;
			return;
	} else {
	} else {
		skb = nicvf_get_rcv_skb(snic, cqe_rx,
		skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
	struct cqe_rx_t *cq_desc;
	struct cqe_rx_t *cq_desc;
	struct netdev_queue *txq;
	struct netdev_queue *txq;
	struct snd_queue *sq = &qs->sq[cq_idx];
	struct snd_queue *sq = &qs->sq[cq_idx];
	struct rcv_queue *rq = &qs->rq[cq_idx];
	unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
	unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;


	spin_lock_bh(&cq->lock);
	spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,


		switch (cq_desc->cqe_type) {
		switch (cq_desc->cqe_type) {
		case CQE_TYPE_RX:
		case CQE_TYPE_RX:
			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
			work_done++;
			work_done++;
		break;
		break;
		case CQE_TYPE_SEND:
		case CQE_TYPE_SEND:
+4 −0
Original line number Original line Diff line number Diff line
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,


	if (!rq->enable) {
	if (!rq->enable) {
		nicvf_reclaim_rcv_queue(nic, qs, qidx);
		nicvf_reclaim_rcv_queue(nic, qs, qidx);
		xdp_rxq_info_unreg(&rq->xdp_rxq);
		return;
		return;
	}
	}


@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
	/* all writes of RBDR data to be loaded into L2 Cache as well*/
	/* all writes of RBDR data to be loaded into L2 Cache as well*/
	rq->caching = 1;
	rq->caching = 1;


	/* Driver have no proper error path for failed XDP RX-queue info reg */
	WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);

	/* Send a mailbox msg to PF to config RQ */
	/* Send a mailbox msg to PF to config RQ */
	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
	mbx.rq.qs_num = qs->vnic_id;
	mbx.rq.qs_num = qs->vnic_id;
Loading