Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2a54003e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'xdp-page_pool-fixes-and-in-flight-accounting'



Jesper Dangaard Brouer says:

====================
xdp: page_pool fixes and in-flight accounting

This patchset fix page_pool API and users, such that drivers can use it for
DMA-mapping. A number of places exist, where the DMA-mapping would not get
released/unmapped, all these are fixed. This occurs e.g. when an xdp_frame
gets converted to an SKB. As network stack doesn't have any callback for XDP
memory models.

The patchset also address a shutdown race-condition. Today removing a XDP
memory model, based on page_pool, is only delayed one RCU grace period. This
isn't enough as redirected xdp_frames can still be in-flight on different
queues (remote driver TX, cpumap or veth).

We stress that when drivers use page_pool for DMA-mapping, then they MUST
use one packet per page. This might change in the future, but more work lies
ahead, before we can lift this restriction.

This patchset change the page_pool API to be more strict, as in-flight page
accounting is added.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9371a56f f71fec47
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
@@ -545,8 +545,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
	}
	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
					 MEM_TYPE_PAGE_POOL, rq->page_pool);
	if (err)
	if (err) {
		page_pool_free(rq->page_pool);
		goto err_free;
	}

	for (i = 0; i < wq_sz; i++) {
		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -611,8 +613,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
	if (rq->xdp_prog)
		bpf_prog_put(rq->xdp_prog);
	xdp_rxq_info_unreg(&rq->xdp_rxq);
	if (rq->page_pool)
		page_pool_destroy(rq->page_pool);
	mlx5_wq_destroy(&rq->wq_ctrl);

	return err;
@@ -625,10 +625,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
	if (rq->xdp_prog)
		bpf_prog_put(rq->xdp_prog);

	xdp_rxq_info_unreg(&rq->xdp_rxq);
	if (rq->page_pool)
		page_pool_destroy(rq->page_pool);

	switch (rq->wq_type) {
	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
		kvfree(rq->mpwqe.info);
@@ -645,6 +641,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)

		mlx5e_page_release(rq, dma_info, false);
	}

	xdp_rxq_info_unreg(&rq->xdp_rxq);
	mlx5_wq_destroy(&rq->wq_ctrl);
}

+2 −1
Original line number Diff line number Diff line
@@ -248,7 +248,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
	dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0,
				      PAGE_SIZE, rq->buff.map_dir);
	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
		put_page(dma_info->page);
		page_pool_recycle_direct(rq->page_pool, dma_info->page);
		dma_info->page = NULL;
		return -ENOMEM;
	}
@@ -272,6 +272,7 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
		page_pool_recycle_direct(rq->page_pool, dma_info->page);
	} else {
		mlx5e_page_dma_unmap(rq, dma_info);
		page_pool_release_page(rq->page_pool, dma_info->page);
		put_page(dma_info->page);
	}
}
+1 −0
Original line number Diff line number Diff line
@@ -547,6 +547,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
		goto err;
	}

	xdp_release_frame(frame);
	xdp_scrub_frame(frame);
	skb->protocol = eth_type_trans(skb, rq->dev);
err:
+60 −9
Original line number Diff line number Diff line
@@ -16,14 +16,16 @@
 * page_pool_alloc_pages() call.  Drivers should likely use
 * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
 *
 * If page_pool handles DMA mapping (use page->private), then API user
 * is responsible for invoking page_pool_put_page() once.  In-case of
 * elevated refcnt, the DMA state is released, assuming other users of
 * the page will eventually call put_page().
 * API keeps track of in-flight pages, in-order to let API user know
 * when it is safe to dealloactor page_pool object.  Thus, API users
 * must make sure to call page_pool_release_page() when a page is
 * "leaving" the page_pool.  Or call page_pool_put_page() where
 * appropiate.  For maintaining correct accounting.
 *
 * If no DMA mapping is done, then it can act as shim-layer that
 * fall-through to alloc_page.  As no state is kept on the page, the
 * regular put_page() call is sufficient.
 * API user must only call page_pool_put_page() once on a page, as it
 * will either recycle the page, or in case of elevated refcnt, it
 * will release the DMA mapping and in-flight state accounting.  We
 * hope to lift this requirement in the future.
 */
#ifndef _NET_PAGE_POOL_H
#define _NET_PAGE_POOL_H
@@ -66,9 +68,10 @@ struct page_pool_params {
};

struct page_pool {
	struct rcu_head rcu;
	struct page_pool_params p;

        u32 pages_state_hold_cnt;

	/*
	 * Data structure for allocation side
	 *
@@ -96,6 +99,8 @@ struct page_pool {
	 * TODO: Implement bulk return pages into this structure.
	 */
	struct ptr_ring ring;

	atomic_t pages_state_release_cnt;
};

struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
@@ -109,7 +114,16 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)

struct page_pool *page_pool_create(const struct page_pool_params *params);

void page_pool_destroy(struct page_pool *pool);
void __page_pool_free(struct page_pool *pool);
static inline void page_pool_free(struct page_pool *pool)
{
	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
	 */
#ifdef CONFIG_PAGE_POOL
	__page_pool_free(pool);
#endif
}

/* Never call this directly, use helpers below */
void __page_pool_put_page(struct page_pool *pool,
@@ -132,6 +146,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
	__page_pool_put_page(pool, page, true);
}

/* API user MUST have disconnected alloc-side (not allowed to call
 * page_pool_alloc_pages()) before calling this.  The free-side can
 * still run concurrently, to handle in-flight packet-pages.
 *
 * A request to shutdown can fail (with false) if there are still
 * in-flight packet-pages.
 */
bool __page_pool_request_shutdown(struct page_pool *pool);
static inline bool page_pool_request_shutdown(struct page_pool *pool)
{
	/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
	 */
#ifdef CONFIG_PAGE_POOL
	return __page_pool_request_shutdown(pool);
#endif
}

/* Disconnects a page (from a page_pool).  API users can have a need
 * to disconnect a page (from a page_pool), to allow it to be used as
 * a regular page (that will eventually be returned to the normal
 * page-allocator via put_page).
 */
void page_pool_unmap_page(struct page_pool *pool, struct page *page);
static inline void page_pool_release_page(struct page_pool *pool,
					  struct page *page)
{
#ifdef CONFIG_PAGE_POOL
	page_pool_unmap_page(pool, page);
#endif
}

static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
	return page->dma_addr;
}

static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
+15 −0
Original line number Diff line number Diff line
@@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);

/* When sending xdp_frame into the network stack, then there is no
 * return point callback, which is needed to release e.g. DMA-mapping
 * resources with page_pool.  Thus, have explicit function to release
 * frame resources.
 */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
	struct xdp_mem_info *mem = &xdpf->mem;

	/* Curr only page_pool needs this */
	if (mem->type == MEM_TYPE_PAGE_POOL)
		__xdp_release_frame(xdpf->data, mem);
}

int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
		     struct net_device *dev, u32 queue_index);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
Loading