Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit e52fcb24 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

bnx2x: uses build_skb() in receive path



bnx2x uses following formula to compute its rx_buf_sz :

dev->mtu + 2*L1_CACHE_BYTES + 14 + 8 + 8 + 2

Then core network adds NET_SKB_PAD and SKB_DATA_ALIGN(sizeof(struct
skb_shared_info))

Final allocated size for skb head on x86_64 (L1_CACHE_BYTES = 64,
MTU=1500) : 2112 bytes : SLUB/SLAB round this to 4096 bytes.

Since skb truesize is then bigger than SK_MEM_QUANTUM, we have lot of
false sharing because of mem_reclaim in UDP stack.

One possible way to half truesize is to reduce the need by 64 bytes
(2112 -> 2048 bytes)

Instead of allocating a full cache line at the end of packet for
alignment, we can use the fact that skb_shared_info sits at the end of
skb->head, and we can use this room, if we convert bnx2x to new
build_skb() infrastructure.

skb_shared_info will be initialized after hardware finished its
transfert, so we can eventually overwrite the final padding.

Using build_skb() also reduces cache line misses in the driver, since we
use cache hot skb instead of cold ones. Number of in-flight sk_buff
structures is lower, they are recycled while still hot.

Performance results :

(820.000 pps on a rx UDP monothread benchmark, instead of 720.000 pps)

Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
CC: Eilon Greenstein <eilong@broadcom.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Tom Herbert <therbert@google.com>
CC: Jamal Hadi Salim <hadi@mojatatu.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Thomas Graf <tgraf@infradead.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: default avatarEilon Greenstein <eilong@broadcom.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b2b5ce9d
Loading
Loading
Loading
Loading
+23 −7
Original line number Original line Diff line number Diff line
@@ -293,8 +293,13 @@ enum {
#define FCOE_TXQ_IDX(bp)	(MAX_ETH_TXQ_IDX(bp))
#define FCOE_TXQ_IDX(bp)	(MAX_ETH_TXQ_IDX(bp))


/* fast path */
/* fast path */
/*
 * This driver uses new build_skb() API :
 * RX ring buffer contains pointer to kmalloc() data only,
 * skb are built only after Hardware filled the frame.
 */
struct sw_rx_bd {
struct sw_rx_bd {
	struct sk_buff	*skb;
	u8		*data;
	DEFINE_DMA_UNMAP_ADDR(mapping);
	DEFINE_DMA_UNMAP_ADDR(mapping);
};
};


@@ -424,8 +429,8 @@ union host_hc_status_block {


struct bnx2x_agg_info {
struct bnx2x_agg_info {
	/*
	/*
	 * First aggregation buffer is an skb, the following - are pages.
	 * First aggregation buffer is a data buffer, the following - are pages.
	 * We will preallocate the skbs for each aggregation when
	 * We will preallocate the data buffer for each aggregation when
	 * we open the interface and will replace the BD at the consumer
	 * we open the interface and will replace the BD at the consumer
	 * with this one when we receive the TPA_START CQE in order to
	 * with this one when we receive the TPA_START CQE in order to
	 * keep the Rx BD ring consistent.
	 * keep the Rx BD ring consistent.
@@ -439,6 +444,7 @@ struct bnx2x_agg_info {
	u16			parsing_flags;
	u16			parsing_flags;
	u16			vlan_tag;
	u16			vlan_tag;
	u16			len_on_bd;
	u16			len_on_bd;
	u32			rxhash;
};
};


#define Q_STATS_OFFSET32(stat_name) \
#define Q_STATS_OFFSET32(stat_name) \
@@ -1187,10 +1193,20 @@ struct bnx2x {
#define ETH_MAX_JUMBO_PACKET_SIZE	9600
#define ETH_MAX_JUMBO_PACKET_SIZE	9600


	/* Max supported alignment is 256 (8 shift) */
	/* Max supported alignment is 256 (8 shift) */
#define BNX2X_RX_ALIGN_SHIFT		((L1_CACHE_SHIFT < 8) ? \
#define BNX2X_RX_ALIGN_SHIFT		min(8, L1_CACHE_SHIFT)
					 L1_CACHE_SHIFT : 8)

	/* FW use 2 Cache lines Alignment for start packet and size  */
	/* FW uses 2 Cache lines Alignment for start packet and size
#define BNX2X_FW_RX_ALIGN		(2 << BNX2X_RX_ALIGN_SHIFT)
	 *
	 * We assume skb_build() uses sizeof(struct skb_shared_info) bytes
	 * at the end of skb->data, to avoid wasting a full cache line.
	 * This reduces memory use (skb->truesize).
	 */
#define BNX2X_FW_RX_ALIGN_START	(1UL << BNX2X_RX_ALIGN_SHIFT)

#define BNX2X_FW_RX_ALIGN_END					\
	max(1UL << BNX2X_RX_ALIGN_SHIFT, 			\
	    SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

#define BNX2X_PXP_DRAM_ALIGN		(BNX2X_RX_ALIGN_SHIFT - 5)
#define BNX2X_PXP_DRAM_ALIGN		(BNX2X_RX_ALIGN_SHIFT - 5)


	struct host_sp_status_block *def_status_blk;
	struct host_sp_status_block *def_status_blk;
+126 −141
Original line number Original line Diff line number Diff line
@@ -294,8 +294,21 @@ static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp,
	   fp->last_max_sge, fp->rx_sge_prod);
	   fp->last_max_sge, fp->rx_sge_prod);
}
}


/* Set Toeplitz hash value in the skb using the value from the
 * CQE (calculated by HW).
 */
static u32 bnx2x_get_rxhash(const struct bnx2x *bp,
			    const struct eth_fast_path_rx_cqe *cqe)
{
	/* Set Toeplitz hash from CQE */
	if ((bp->dev->features & NETIF_F_RXHASH) &&
	    (cqe->status_flags & ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
		return le32_to_cpu(cqe->rss_hash_result);
	return 0;
}

static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
			    struct sk_buff *skb, u16 cons, u16 prod,
			    u16 cons, u16 prod,
			    struct eth_fast_path_rx_cqe *cqe)
			    struct eth_fast_path_rx_cqe *cqe)
{
{
	struct bnx2x *bp = fp->bp;
	struct bnx2x *bp = fp->bp;
@@ -310,9 +323,9 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
	if (tpa_info->tpa_state != BNX2X_TPA_STOP)
	if (tpa_info->tpa_state != BNX2X_TPA_STOP)
		BNX2X_ERR("start of bin not in stop [%d]\n", queue);
		BNX2X_ERR("start of bin not in stop [%d]\n", queue);


	/* Try to map an empty skb from the aggregation info  */
	/* Try to map an empty data buffer from the aggregation info  */
	mapping = dma_map_single(&bp->pdev->dev,
	mapping = dma_map_single(&bp->pdev->dev,
				 first_buf->skb->data,
				 first_buf->data + NET_SKB_PAD,
				 fp->rx_buf_size, DMA_FROM_DEVICE);
				 fp->rx_buf_size, DMA_FROM_DEVICE);
	/*
	/*
	 *  ...if it fails - move the skb from the consumer to the producer
	 *  ...if it fails - move the skb from the consumer to the producer
@@ -322,15 +335,15 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,


	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
		/* Move the BD from the consumer to the producer */
		/* Move the BD from the consumer to the producer */
		bnx2x_reuse_rx_skb(fp, cons, prod);
		bnx2x_reuse_rx_data(fp, cons, prod);
		tpa_info->tpa_state = BNX2X_TPA_ERROR;
		tpa_info->tpa_state = BNX2X_TPA_ERROR;
		return;
		return;
	}
	}


	/* move empty skb from pool to prod */
	/* move empty data from pool to prod */
	prod_rx_buf->skb = first_buf->skb;
	prod_rx_buf->data = first_buf->data;
	dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
	dma_unmap_addr_set(prod_rx_buf, mapping, mapping);
	/* point prod_bd to new skb */
	/* point prod_bd to new data */
	prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
	prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
	prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
	prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));


@@ -344,6 +357,7 @@ static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
	tpa_info->tpa_state = BNX2X_TPA_START;
	tpa_info->tpa_state = BNX2X_TPA_START;
	tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
	tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
	tpa_info->placement_offset = cqe->placement_offset;
	tpa_info->placement_offset = cqe->placement_offset;
	tpa_info->rxhash = bnx2x_get_rxhash(bp, cqe);


#ifdef BNX2X_STOP_ON_ERROR
#ifdef BNX2X_STOP_ON_ERROR
	fp->tpa_queue_used |= (1 << queue);
	fp->tpa_queue_used |= (1 << queue);
@@ -471,11 +485,12 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
{
{
	struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
	struct bnx2x_agg_info *tpa_info = &fp->tpa_info[queue];
	struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
	struct sw_rx_bd *rx_buf = &tpa_info->first_buf;
	u8 pad = tpa_info->placement_offset;
	u32 pad = tpa_info->placement_offset;
	u16 len = tpa_info->len_on_bd;
	u16 len = tpa_info->len_on_bd;
	struct sk_buff *skb = rx_buf->skb;
	struct sk_buff *skb = NULL;
	u8 *data = rx_buf->data;
	/* alloc new skb */
	/* alloc new skb */
	struct sk_buff *new_skb;
	u8 *new_data;
	u8 old_tpa_state = tpa_info->tpa_state;
	u8 old_tpa_state = tpa_info->tpa_state;


	tpa_info->tpa_state = BNX2X_TPA_STOP;
	tpa_info->tpa_state = BNX2X_TPA_STOP;
@@ -486,18 +501,18 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
	if (old_tpa_state == BNX2X_TPA_ERROR)
	if (old_tpa_state == BNX2X_TPA_ERROR)
		goto drop;
		goto drop;


	/* Try to allocate the new skb */
	/* Try to allocate the new data */
	new_skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
	new_data = kmalloc(fp->rx_buf_size + NET_SKB_PAD, GFP_ATOMIC);


	/* Unmap skb in the pool anyway, as we are going to change
	/* Unmap skb in the pool anyway, as we are going to change
	   pool entry status to BNX2X_TPA_STOP even if new skb allocation
	   pool entry status to BNX2X_TPA_STOP even if new skb allocation
	   fails. */
	   fails. */
	dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
	dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
			 fp->rx_buf_size, DMA_FROM_DEVICE);
			 fp->rx_buf_size, DMA_FROM_DEVICE);
	if (likely(new_data))
		skb = build_skb(data);


	if (likely(new_skb)) {
	if (likely(skb)) {
		prefetch(skb);
		prefetch(((char *)(skb)) + L1_CACHE_BYTES);


#ifdef BNX2X_STOP_ON_ERROR
#ifdef BNX2X_STOP_ON_ERROR
		if (pad + len > fp->rx_buf_size) {
		if (pad + len > fp->rx_buf_size) {
@@ -509,8 +524,9 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
		}
		}
#endif
#endif


		skb_reserve(skb, pad);
		skb_reserve(skb, pad + NET_SKB_PAD);
		skb_put(skb, len);
		skb_put(skb, len);
		skb->rxhash = tpa_info->rxhash;


		skb->protocol = eth_type_trans(skb, bp->dev);
		skb->protocol = eth_type_trans(skb, bp->dev);
		skb->ip_summed = CHECKSUM_UNNECESSARY;
		skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -526,8 +542,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
		}
		}




		/* put new skb in bin */
		/* put new data in bin */
		rx_buf->skb = new_skb;
		rx_buf->data = new_data;


		return;
		return;
	}
	}
@@ -539,19 +555,6 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
	fp->eth_q_stats.rx_skb_alloc_failed++;
	fp->eth_q_stats.rx_skb_alloc_failed++;
}
}


/* Set Toeplitz hash value in the skb using the value from the
 * CQE (calculated by HW).
 */
static inline void bnx2x_set_skb_rxhash(struct bnx2x *bp, union eth_rx_cqe *cqe,
					struct sk_buff *skb)
{
	/* Set Toeplitz hash from CQE */
	if ((bp->dev->features & NETIF_F_RXHASH) &&
	    (cqe->fast_path_cqe.status_flags &
	     ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG))
		skb->rxhash =
		le32_to_cpu(cqe->fast_path_cqe.rss_hash_result);
}


int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
{
{
@@ -594,6 +597,7 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
		u8 cqe_fp_flags;
		u8 cqe_fp_flags;
		enum eth_rx_cqe_type cqe_fp_type;
		enum eth_rx_cqe_type cqe_fp_type;
		u16 len, pad;
		u16 len, pad;
		u8 *data;


#ifdef BNX2X_STOP_ON_ERROR
#ifdef BNX2X_STOP_ON_ERROR
		if (unlikely(bp->panic))
		if (unlikely(bp->panic))
@@ -604,13 +608,6 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
		bd_prod = RX_BD(bd_prod);
		bd_prod = RX_BD(bd_prod);
		bd_cons = RX_BD(bd_cons);
		bd_cons = RX_BD(bd_cons);


		/* Prefetch the page containing the BD descriptor
		   at producer's index. It will be needed when new skb is
		   allocated */
		prefetch((void *)(PAGE_ALIGN((unsigned long)
					     (&fp->rx_desc_ring[bd_prod])) -
				  PAGE_SIZE + 1));

		cqe = &fp->rx_comp_ring[comp_ring_cons];
		cqe = &fp->rx_comp_ring[comp_ring_cons];
		cqe_fp = &cqe->fast_path_cqe;
		cqe_fp = &cqe->fast_path_cqe;
		cqe_fp_flags = cqe_fp->type_error_flags;
		cqe_fp_flags = cqe_fp->type_error_flags;
@@ -626,12 +623,9 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
		if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
		if (unlikely(CQE_TYPE_SLOW(cqe_fp_type))) {
			bnx2x_sp_event(fp, cqe);
			bnx2x_sp_event(fp, cqe);
			goto next_cqe;
			goto next_cqe;

		}
		/* this is an rx packet */
		} else {
		rx_buf = &fp->rx_buf_ring[bd_cons];
		rx_buf = &fp->rx_buf_ring[bd_cons];
			skb = rx_buf->skb;
		data = rx_buf->data;
			prefetch(skb);


		if (!CQE_TYPE_FAST(cqe_fp_type)) {
		if (!CQE_TYPE_FAST(cqe_fp_type)) {
#ifdef BNX2X_STOP_ON_ERROR
#ifdef BNX2X_STOP_ON_ERROR
@@ -650,15 +644,10 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
				   "calling tpa_start on queue %d\n",
				   "calling tpa_start on queue %d\n",
				   queue);
				   queue);


					bnx2x_tpa_start(fp, queue, skb,
				bnx2x_tpa_start(fp, queue,
						bd_cons, bd_prod,
						bd_cons, bd_prod,
						cqe_fp);
						cqe_fp);

					/* Set Toeplitz hash for LRO skb */
					bnx2x_set_skb_rxhash(bp, cqe, skb);

				goto next_rx;
				goto next_rx;

			} else {
			} else {
				u16 queue =
				u16 queue =
					cqe->end_agg_cqe.queue_index;
					cqe->end_agg_cqe.queue_index;
@@ -685,8 +674,8 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
					dma_unmap_addr(rx_buf, mapping),
					dma_unmap_addr(rx_buf, mapping),
					pad + RX_COPY_THRESH,
					pad + RX_COPY_THRESH,
					DMA_FROM_DEVICE);
					DMA_FROM_DEVICE);
			prefetch(((char *)(skb)) + L1_CACHE_BYTES);
		pad += NET_SKB_PAD;

		prefetch(data + pad); /* speedup eth_type_trans() */
		/* is this an error packet? */
		/* is this an error packet? */
		if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
		if (unlikely(cqe_fp_flags & ETH_RX_ERROR_FALGS)) {
			DP(NETIF_MSG_RX_ERR,
			DP(NETIF_MSG_RX_ERR,
@@ -701,50 +690,43 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
		 */
		 */
		if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
		if ((bp->dev->mtu > ETH_MAX_PACKET_SIZE) &&
		    (len <= RX_COPY_THRESH)) {
		    (len <= RX_COPY_THRESH)) {
				struct sk_buff *new_skb;
			skb = netdev_alloc_skb_ip_align(bp->dev, len);

			if (skb == NULL) {
				new_skb = netdev_alloc_skb(bp->dev, len + pad);
				if (new_skb == NULL) {
				DP(NETIF_MSG_RX_ERR,
				DP(NETIF_MSG_RX_ERR,
					   "ERROR  packet dropped "
				   "ERROR  packet dropped because of alloc failure\n");
					   "because of alloc failure\n");
				fp->eth_q_stats.rx_skb_alloc_failed++;
				fp->eth_q_stats.rx_skb_alloc_failed++;
				goto reuse_rx;
				goto reuse_rx;
			}
			}

			memcpy(skb->data, data + pad, len);
				/* aligned copy */
			bnx2x_reuse_rx_data(fp, bd_cons, bd_prod);
				skb_copy_from_linear_data_offset(skb, pad,
		} else {
						    new_skb->data + pad, len);
			if (likely(bnx2x_alloc_rx_data(bp, fp, bd_prod) == 0)) {
				skb_reserve(new_skb, pad);
				skb_put(new_skb, len);

				bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);

				skb = new_skb;

			} else
			if (likely(bnx2x_alloc_rx_skb(bp, fp, bd_prod) == 0)) {
				dma_unmap_single(&bp->pdev->dev,
				dma_unmap_single(&bp->pdev->dev,
						 dma_unmap_addr(rx_buf, mapping),
						 dma_unmap_addr(rx_buf, mapping),
						 fp->rx_buf_size,
						 fp->rx_buf_size,
						 DMA_FROM_DEVICE);
						 DMA_FROM_DEVICE);
				skb = build_skb(data);
				if (unlikely(!skb)) {
					kfree(data);
					fp->eth_q_stats.rx_skb_alloc_failed++;
					goto next_rx;
				}
				skb_reserve(skb, pad);
				skb_reserve(skb, pad);
				skb_put(skb, len);

			} else {
			} else {
				DP(NETIF_MSG_RX_ERR,
				DP(NETIF_MSG_RX_ERR,
				   "ERROR  packet dropped because "
				   "ERROR  packet dropped because "
				   "of alloc failure\n");
				   "of alloc failure\n");
				fp->eth_q_stats.rx_skb_alloc_failed++;
				fp->eth_q_stats.rx_skb_alloc_failed++;
reuse_rx:
reuse_rx:
				bnx2x_reuse_rx_skb(fp, bd_cons, bd_prod);
				bnx2x_reuse_rx_data(fp, bd_cons, bd_prod);
				goto next_rx;
				goto next_rx;
			}
			}


			skb_put(skb, len);
			skb->protocol = eth_type_trans(skb, bp->dev);
			skb->protocol = eth_type_trans(skb, bp->dev);


			/* Set Toeplitz hash for a none-LRO skb */
			/* Set Toeplitz hash for a none-LRO skb */
			bnx2x_set_skb_rxhash(bp, cqe, skb);
			skb->rxhash = bnx2x_get_rxhash(bp, cqe_fp);


			skb_checksum_none_assert(skb);
			skb_checksum_none_assert(skb);


@@ -767,7 +749,7 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)




next_rx:
next_rx:
		rx_buf->skb = NULL;
		rx_buf->data = NULL;


		bd_cons = NEXT_RX_IDX(bd_cons);
		bd_cons = NEXT_RX_IDX(bd_cons);
		bd_prod = NEXT_RX_IDX(bd_prod);
		bd_prod = NEXT_RX_IDX(bd_prod);
@@ -1013,9 +995,9 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
				struct sw_rx_bd *first_buf =
				struct sw_rx_bd *first_buf =
					&tpa_info->first_buf;
					&tpa_info->first_buf;


				first_buf->skb = netdev_alloc_skb(bp->dev,
				first_buf->data = kmalloc(fp->rx_buf_size + NET_SKB_PAD,
						       fp->rx_buf_size);
							  GFP_ATOMIC);
				if (!first_buf->skb) {
				if (!first_buf->data) {
					BNX2X_ERR("Failed to allocate TPA "
					BNX2X_ERR("Failed to allocate TPA "
						  "skb pool for queue[%d] - "
						  "skb pool for queue[%d] - "
						  "disabling TPA on this "
						  "disabling TPA on this "
@@ -1118,16 +1100,16 @@ static void bnx2x_free_rx_bds(struct bnx2x_fastpath *fp)


	for (i = 0; i < NUM_RX_BD; i++) {
	for (i = 0; i < NUM_RX_BD; i++) {
		struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
		struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[i];
		struct sk_buff *skb = rx_buf->skb;
		u8 *data = rx_buf->data;


		if (skb == NULL)
		if (data == NULL)
			continue;
			continue;
		dma_unmap_single(&bp->pdev->dev,
		dma_unmap_single(&bp->pdev->dev,
				 dma_unmap_addr(rx_buf, mapping),
				 dma_unmap_addr(rx_buf, mapping),
				 fp->rx_buf_size, DMA_FROM_DEVICE);
				 fp->rx_buf_size, DMA_FROM_DEVICE);


		rx_buf->skb = NULL;
		rx_buf->data = NULL;
		dev_kfree_skb(skb);
		kfree(data);
	}
	}
}
}


@@ -1509,6 +1491,7 @@ static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)


	for_each_queue(bp, i) {
	for_each_queue(bp, i) {
		struct bnx2x_fastpath *fp = &bp->fp[i];
		struct bnx2x_fastpath *fp = &bp->fp[i];
		u32 mtu;


		/* Always use a mini-jumbo MTU for the FCoE L2 ring */
		/* Always use a mini-jumbo MTU for the FCoE L2 ring */
		if (IS_FCOE_IDX(i))
		if (IS_FCOE_IDX(i))
@@ -1518,13 +1501,15 @@ static inline void bnx2x_set_rx_buf_size(struct bnx2x *bp)
			 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
			 * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer
			 * overrun attack.
			 * overrun attack.
			 */
			 */
			fp->rx_buf_size =
			mtu = BNX2X_FCOE_MINI_JUMBO_MTU;
				BNX2X_FCOE_MINI_JUMBO_MTU + ETH_OVREHEAD +
				BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
		else
		else
			fp->rx_buf_size =
			mtu = bp->dev->mtu;
				bp->dev->mtu + ETH_OVREHEAD +
		fp->rx_buf_size = BNX2X_FW_RX_ALIGN_START +
				BNX2X_FW_RX_ALIGN + IP_HEADER_ALIGNMENT_PADDING;
				  IP_HEADER_ALIGNMENT_PADDING +
				  ETH_OVREHEAD +
				  mtu +
				  BNX2X_FW_RX_ALIGN_END;
		/* Note : rx_buf_size doesnt take into account NET_SKB_PAD */
	}
	}
}
}


+17 −16
Original line number Original line Diff line number Diff line
@@ -910,26 +910,27 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp,
	return 0;
	return 0;
}
}


static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
static inline int bnx2x_alloc_rx_data(struct bnx2x *bp,
				      struct bnx2x_fastpath *fp, u16 index)
				      struct bnx2x_fastpath *fp, u16 index)
{
{
	struct sk_buff *skb;
	u8 *data;
	struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index];
	struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index];
	struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index];
	struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index];
	dma_addr_t mapping;
	dma_addr_t mapping;


	skb = netdev_alloc_skb(bp->dev, fp->rx_buf_size);
	data = kmalloc(fp->rx_buf_size + NET_SKB_PAD, GFP_ATOMIC);
	if (unlikely(skb == NULL))
	if (unlikely(data == NULL))
		return -ENOMEM;
		return -ENOMEM;


	mapping = dma_map_single(&bp->pdev->dev, skb->data, fp->rx_buf_size,
	mapping = dma_map_single(&bp->pdev->dev, data + NET_SKB_PAD,
				 fp->rx_buf_size,
				 DMA_FROM_DEVICE);
				 DMA_FROM_DEVICE);
	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
	if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
		dev_kfree_skb_any(skb);
		kfree(data);
		return -ENOMEM;
		return -ENOMEM;
	}
	}


	rx_buf->skb = skb;
	rx_buf->data = data;
	dma_unmap_addr_set(rx_buf, mapping, mapping);
	dma_unmap_addr_set(rx_buf, mapping, mapping);


	rx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
	rx_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
@@ -938,12 +939,12 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
	return 0;
	return 0;
}
}


/* note that we are not allocating a new skb,
/* note that we are not allocating a new buffer,
 * we are just moving one from cons to prod
 * we are just moving one from cons to prod
 * we are not creating a new mapping,
 * we are not creating a new mapping,
 * so there is no need to check for dma_mapping_error().
 * so there is no need to check for dma_mapping_error().
 */
 */
static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
static inline void bnx2x_reuse_rx_data(struct bnx2x_fastpath *fp,
				      u16 cons, u16 prod)
				      u16 cons, u16 prod)
{
{
	struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
	struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
@@ -953,7 +954,7 @@ static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,


	dma_unmap_addr_set(prod_rx_buf, mapping,
	dma_unmap_addr_set(prod_rx_buf, mapping,
			   dma_unmap_addr(cons_rx_buf, mapping));
			   dma_unmap_addr(cons_rx_buf, mapping));
	prod_rx_buf->skb = cons_rx_buf->skb;
	prod_rx_buf->data = cons_rx_buf->data;
	*prod_bd = *cons_bd;
	*prod_bd = *cons_bd;
}
}


@@ -1029,9 +1030,9 @@ static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
	for (i = 0; i < last; i++) {
	for (i = 0; i < last; i++) {
		struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i];
		struct bnx2x_agg_info *tpa_info = &fp->tpa_info[i];
		struct sw_rx_bd *first_buf = &tpa_info->first_buf;
		struct sw_rx_bd *first_buf = &tpa_info->first_buf;
		struct sk_buff *skb = first_buf->skb;
		u8 *data = first_buf->data;


		if (skb == NULL) {
		if (data == NULL) {
			DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
			DP(NETIF_MSG_IFDOWN, "tpa bin %d empty on free\n", i);
			continue;
			continue;
		}
		}
@@ -1039,8 +1040,8 @@ static inline void bnx2x_free_tpa_pool(struct bnx2x *bp,
			dma_unmap_single(&bp->pdev->dev,
			dma_unmap_single(&bp->pdev->dev,
					 dma_unmap_addr(first_buf, mapping),
					 dma_unmap_addr(first_buf, mapping),
					 fp->rx_buf_size, DMA_FROM_DEVICE);
					 fp->rx_buf_size, DMA_FROM_DEVICE);
		dev_kfree_skb(skb);
		kfree(data);
		first_buf->skb = NULL;
		first_buf->data = NULL;
	}
	}
}
}


@@ -1148,7 +1149,7 @@ static inline int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp,
	 * fp->eth_q_stats.rx_skb_alloc_failed = 0
	 * fp->eth_q_stats.rx_skb_alloc_failed = 0
	 */
	 */
	for (i = 0; i < rx_ring_size; i++) {
	for (i = 0; i < rx_ring_size; i++) {
		if (bnx2x_alloc_rx_skb(bp, fp, ring_prod) < 0) {
		if (bnx2x_alloc_rx_data(bp, fp, ring_prod) < 0) {
			fp->eth_q_stats.rx_skb_alloc_failed++;
			fp->eth_q_stats.rx_skb_alloc_failed++;
			continue;
			continue;
		}
		}
+3 −3
Original line number Original line Diff line number Diff line
@@ -1740,6 +1740,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
	struct sw_rx_bd *rx_buf;
	struct sw_rx_bd *rx_buf;
	u16 len;
	u16 len;
	int rc = -ENODEV;
	int rc = -ENODEV;
	u8 *data;


	/* check the loopback mode */
	/* check the loopback mode */
	switch (loopback_mode) {
	switch (loopback_mode) {
@@ -1865,10 +1866,9 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
	dma_sync_single_for_cpu(&bp->pdev->dev,
	dma_sync_single_for_cpu(&bp->pdev->dev,
				   dma_unmap_addr(rx_buf, mapping),
				   dma_unmap_addr(rx_buf, mapping),
				   fp_rx->rx_buf_size, DMA_FROM_DEVICE);
				   fp_rx->rx_buf_size, DMA_FROM_DEVICE);
	skb = rx_buf->skb;
	data = rx_buf->data + NET_SKB_PAD + cqe->fast_path_cqe.placement_offset;
	skb_reserve(skb, cqe->fast_path_cqe.placement_offset);
	for (i = ETH_HLEN; i < pkt_size; i++)
	for (i = ETH_HLEN; i < pkt_size; i++)
		if (*(skb->data + i) != (unsigned char) (i & 0xff))
		if (*(data + i) != (unsigned char) (i & 0xff))
			goto test_loopback_rx_exit;
			goto test_loopback_rx_exit;


	rc = 0;
	rc = 0;
+2 −2
Original line number Original line Diff line number Diff line
@@ -2789,8 +2789,8 @@ static void bnx2x_pf_rx_q_prep(struct bnx2x *bp,
	/* This should be a maximum number of data bytes that may be
	/* This should be a maximum number of data bytes that may be
	 * placed on the BD (not including paddings).
	 * placed on the BD (not including paddings).
	 */
	 */
	rxq_init->buf_sz = fp->rx_buf_size - BNX2X_FW_RX_ALIGN -
	rxq_init->buf_sz = fp->rx_buf_size - BNX2X_FW_RX_ALIGN_START -
		IP_HEADER_ALIGNMENT_PADDING;
		BNX2X_FW_RX_ALIGN_END -	IP_HEADER_ALIGNMENT_PADDING;


	rxq_init->cl_qzone_id = fp->cl_qzone_id;
	rxq_init->cl_qzone_id = fp->cl_qzone_id;
	rxq_init->tpa_agg_sz = tpa_agg_size;
	rxq_init->tpa_agg_sz = tpa_agg_size;