Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 51151a16 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

mlx4: allow order-0 memory allocations in RX path



Signed-off-by: default avatarEric Dumazet <edumazet@google.com>

mlx4 exclusively uses order-2 allocations in RX path, which are
likely to fail under memory pressure.

We therefore drop frames more than needed.

This patch tries order-3, order-2, order-1 and finally order-0
allocations to keep good performance, yet allow allocations if/when
memory gets fragmented.

By using larger pages, and avoiding unnecessary get_page()/put_page()
on compound pages, this patch improves performance as well, lowering
false sharing on struct page.

Also use GFP_KERNEL allocations in initialization path, as allocating 12
MB (390 order-3 pages) can easily fail with GFP_ATOMIC.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Amir Vadai <amirv@mellanox.com>
Acked-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 3bae9db9
Loading
Loading
Loading
Loading
+89 −80
Original line number Original line Diff line number Diff line
@@ -43,41 +43,65 @@


#include "mlx4_en.h"
#include "mlx4_en.h"


static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
			    struct mlx4_en_rx_alloc *page_alloc,
			    const struct mlx4_en_frag_info *frag_info,
			    gfp_t _gfp)
{
	int order;
	struct page *page;
	dma_addr_t dma;

	for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) {
		gfp_t gfp = _gfp;

		if (order)
			gfp |= __GFP_COMP | __GFP_NOWARN;
		page = alloc_pages(gfp, order);
		if (likely(page))
			break;
		if (--order < 0 ||
		    ((PAGE_SIZE << order) < frag_info->frag_size))
			return -ENOMEM;
	}
	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
			   PCI_DMA_FROMDEVICE);
	if (dma_mapping_error(priv->ddev, dma)) {
		put_page(page);
		return -ENOMEM;
	}
	page_alloc->size = PAGE_SIZE << order;
	page_alloc->page = page;
	page_alloc->dma = dma;
	page_alloc->offset = frag_info->frag_align;
	/* Not doing get_page() for each frag is a big win
	 * on asymetric workloads.
	 */
	atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride);
	return 0;
}

static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
			       struct mlx4_en_rx_desc *rx_desc,
			       struct mlx4_en_rx_desc *rx_desc,
			       struct mlx4_en_rx_alloc *frags,
			       struct mlx4_en_rx_alloc *frags,
			       struct mlx4_en_rx_alloc *ring_alloc)
			       struct mlx4_en_rx_alloc *ring_alloc,
			       gfp_t gfp)
{
{
	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
	struct mlx4_en_frag_info *frag_info;
	const struct mlx4_en_frag_info *frag_info;
	struct page *page;
	struct page *page;
	dma_addr_t dma;
	dma_addr_t dma;
	int i;
	int i;


	for (i = 0; i < priv->num_frags; i++) {
	for (i = 0; i < priv->num_frags; i++) {
		frag_info = &priv->frag_info[i];
		frag_info = &priv->frag_info[i];
		if (ring_alloc[i].offset == frag_info->last_offset) {
		page_alloc[i] = ring_alloc[i];
			page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
		page_alloc[i].offset += frag_info->frag_stride;
					MLX4_EN_ALLOC_ORDER);
		if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size)
			if (!page)
			continue;
				goto out;
		if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
			dma = dma_map_page(priv->ddev, page, 0,
				MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
			if (dma_mapping_error(priv->ddev, dma)) {
				put_page(page);
			goto out;
			goto out;
	}
	}
			page_alloc[i].page = page;
			page_alloc[i].dma = dma;
			page_alloc[i].offset = frag_info->frag_align;
		} else {
			page_alloc[i].page = ring_alloc[i].page;
			get_page(ring_alloc[i].page);
			page_alloc[i].dma = ring_alloc[i].dma;
			page_alloc[i].offset = ring_alloc[i].offset +
						frag_info->frag_stride;
		}
	}


	for (i = 0; i < priv->num_frags; i++) {
	for (i = 0; i < priv->num_frags; i++) {
		frags[i] = ring_alloc[i];
		frags[i] = ring_alloc[i];
@@ -88,14 +112,16 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,


	return 0;
	return 0;



out:
out:
	while (i--) {
	while (i--) {
		frag_info = &priv->frag_info[i];
		frag_info = &priv->frag_info[i];
		if (ring_alloc[i].offset == frag_info->last_offset)
		if (page_alloc[i].page != ring_alloc[i].page) {
			dma_unmap_page(priv->ddev, page_alloc[i].dma,
			dma_unmap_page(priv->ddev, page_alloc[i].dma,
				MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
				page_alloc[i].size, PCI_DMA_FROMDEVICE);
		put_page(page_alloc[i].page);
			page = page_alloc[i].page;
			atomic_set(&page->_count, 1);
			put_page(page);
		}
	}
	}
	return -ENOMEM;
	return -ENOMEM;
}
}
@@ -104,12 +130,12 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
			      struct mlx4_en_rx_alloc *frags,
			      struct mlx4_en_rx_alloc *frags,
			      int i)
			      int i)
{
{
	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
	const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];


	if (frags[i].offset == frag_info->last_offset) {
	if (frags[i].offset + frag_info->frag_stride > frags[i].size)
		dma_unmap_page(priv->ddev, frags[i].dma, MLX4_EN_ALLOC_SIZE,
		dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size,
					 PCI_DMA_FROMDEVICE);
					 PCI_DMA_FROMDEVICE);
	}

	if (frags[i].page)
	if (frags[i].page)
		put_page(frags[i].page);
		put_page(frags[i].page);
}
}
@@ -117,35 +143,28 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
				  struct mlx4_en_rx_ring *ring)
				  struct mlx4_en_rx_ring *ring)
{
{
	struct mlx4_en_rx_alloc *page_alloc;
	int i;
	int i;
	struct mlx4_en_rx_alloc *page_alloc;


	for (i = 0; i < priv->num_frags; i++) {
	for (i = 0; i < priv->num_frags; i++) {
		page_alloc = &ring->page_alloc[i];
		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
		page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
					       MLX4_EN_ALLOC_ORDER);
		if (!page_alloc->page)
			goto out;


		page_alloc->dma = dma_map_page(priv->ddev, page_alloc->page, 0,
		if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
					MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
				     frag_info, GFP_KERNEL))
		if (dma_mapping_error(priv->ddev, page_alloc->dma)) {
			put_page(page_alloc->page);
			page_alloc->page = NULL;
			goto out;
			goto out;
	}
	}
		page_alloc->offset = priv->frag_info[i].frag_align;
		en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
		       i, page_alloc->page);
	}
	return 0;
	return 0;


out:
out:
	while (i--) {
	while (i--) {
		struct page *page;

		page_alloc = &ring->page_alloc[i];
		page_alloc = &ring->page_alloc[i];
		dma_unmap_page(priv->ddev, page_alloc->dma,
		dma_unmap_page(priv->ddev, page_alloc->dma,
				MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
			       page_alloc->size, PCI_DMA_FROMDEVICE);
		put_page(page_alloc->page);
		page = page_alloc->page;
		atomic_set(&page->_count, 1);
		put_page(page);
		page_alloc->page = NULL;
		page_alloc->page = NULL;
	}
	}
	return -ENOMEM;
	return -ENOMEM;
@@ -158,13 +177,18 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
	int i;
	int i;


	for (i = 0; i < priv->num_frags; i++) {
	for (i = 0; i < priv->num_frags; i++) {
		const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];

		page_alloc = &ring->page_alloc[i];
		page_alloc = &ring->page_alloc[i];
		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
		en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
		       i, page_count(page_alloc->page));
		       i, page_count(page_alloc->page));


		dma_unmap_page(priv->ddev, page_alloc->dma,
		dma_unmap_page(priv->ddev, page_alloc->dma,
				MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE);
				page_alloc->size, PCI_DMA_FROMDEVICE);
		while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) {
			put_page(page_alloc->page);
			put_page(page_alloc->page);
			page_alloc->offset += frag_info->frag_stride;
		}
		page_alloc->page = NULL;
		page_alloc->page = NULL;
	}
	}
}
}
@@ -195,13 +219,14 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
}
}


static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
				   struct mlx4_en_rx_ring *ring, int index)
				   struct mlx4_en_rx_ring *ring, int index,
				   gfp_t gfp)
{
{
	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
	struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
	struct mlx4_en_rx_alloc *frags = ring->rx_info +
	struct mlx4_en_rx_alloc *frags = ring->rx_info +
					(index << priv->log_rx_info);
					(index << priv->log_rx_info);


	return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc);
	return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
}
}


static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
@@ -235,7 +260,8 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
			ring = &priv->rx_ring[ring_ind];
			ring = &priv->rx_ring[ring_ind];


			if (mlx4_en_prepare_rx_desc(priv, ring,
			if (mlx4_en_prepare_rx_desc(priv, ring,
						    ring->actual_size)) {
						    ring->actual_size,
						    GFP_KERNEL)) {
				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
				if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
					en_err(priv, "Failed to allocate "
					en_err(priv, "Failed to allocate "
						     "enough rx buffers\n");
						     "enough rx buffers\n");
@@ -450,11 +476,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
					DMA_FROM_DEVICE);
					DMA_FROM_DEVICE);


		/* Save page reference in skb */
		/* Save page reference in skb */
		get_page(frags[nr].page);
		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
		__skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
		skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
		skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
		skb_frags_rx[nr].page_offset = frags[nr].offset;
		skb_frags_rx[nr].page_offset = frags[nr].offset;
		skb->truesize += frag_info->frag_stride;
		skb->truesize += frag_info->frag_stride;
		frags[nr].page = NULL;
	}
	}
	/* Adjust size of last fragment to match actual length */
	/* Adjust size of last fragment to match actual length */
	if (nr > 0)
	if (nr > 0)
@@ -547,7 +573,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
	int index = ring->prod & ring->size_mask;
	int index = ring->prod & ring->size_mask;


	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
		if (mlx4_en_prepare_rx_desc(priv, ring, index))
		if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC))
			break;
			break;
		ring->prod++;
		ring->prod++;
		index = ring->prod & ring->size_mask;
		index = ring->prod & ring->size_mask;
@@ -805,21 +831,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
	return done;
	return done;
}
}



static const int frag_sizes[] = {
/* Calculate the last offset position that accommodates a full fragment
 * (assuming fagment size = stride-align) */
static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
{
	u16 res = MLX4_EN_ALLOC_SIZE % stride;
	u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;

	en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
			    "res:%d offset:%d\n", stride, align, res, offset);
	return offset;
}


static int frag_sizes[] = {
	FRAG_SZ0,
	FRAG_SZ0,
	FRAG_SZ1,
	FRAG_SZ1,
	FRAG_SZ2,
	FRAG_SZ2,
@@ -847,9 +859,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
			priv->frag_info[i].frag_stride =
			priv->frag_info[i].frag_stride =
				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
				ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
		}
		}
		priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
						priv, priv->frag_info[i].frag_stride,
						priv->frag_info[i].frag_align);
		buf_size += priv->frag_info[i].frag_size;
		buf_size += priv->frag_info[i].frag_size;
		i++;
		i++;
	}
	}
@@ -861,13 +870,13 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
	for (i = 0; i < priv->num_frags; i++) {
	for (i = 0; i < priv->num_frags; i++) {
		en_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d align:%d "
		en_err(priv,
				"stride:%d last_offset:%d\n", i,
		       "  frag:%d - size:%d prefix:%d align:%d stride:%d\n",
		       i,
		       priv->frag_info[i].frag_size,
		       priv->frag_info[i].frag_size,
		       priv->frag_info[i].frag_prefix_size,
		       priv->frag_info[i].frag_prefix_size,
		       priv->frag_info[i].frag_align,
		       priv->frag_info[i].frag_align,
				priv->frag_info[i].frag_stride,
		       priv->frag_info[i].frag_stride);
				priv->frag_info[i].last_offset);
	}
	}
}
}


+6 −6
Original line number Original line Diff line number Diff line
@@ -96,7 +96,8 @@


/* Use the maximum between 16384 and a single page */
/* Use the maximum between 16384 and a single page */
#define MLX4_EN_ALLOC_SIZE	PAGE_ALIGN(16384)
#define MLX4_EN_ALLOC_SIZE	PAGE_ALIGN(16384)
#define MLX4_EN_ALLOC_ORDER	get_order(MLX4_EN_ALLOC_SIZE)

#define MLX4_EN_ALLOC_PREFER_ORDER	PAGE_ALLOC_COSTLY_ORDER


/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
 * and 4K allocations) */
 * and 4K allocations) */
@@ -236,7 +237,8 @@ struct mlx4_en_tx_desc {
struct mlx4_en_rx_alloc {
struct mlx4_en_rx_alloc {
	struct page	*page;
	struct page	*page;
	dma_addr_t	dma;
	dma_addr_t	dma;
	u16 offset;
	u32		offset;
	u32		size;
};
};


struct mlx4_en_tx_ring {
struct mlx4_en_tx_ring {
@@ -439,8 +441,6 @@ struct mlx4_en_frag_info {
	u16 frag_prefix_size;
	u16 frag_prefix_size;
	u16 frag_stride;
	u16 frag_stride;
	u16 frag_align;
	u16 frag_align;
	u16 last_offset;

};
};


#ifdef CONFIG_MLX4_EN_DCB
#ifdef CONFIG_MLX4_EN_DCB