Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit ca9ec088 authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher
Browse files

i40e/i40evf: Add support for padding start of frames



This patch adds padding to the start of frames to make room for headroom
for us to eventually start using build_skb.  Right now we guarantee at
least NET_SKB_PAD + NET_IP_ALIGN, however we allocate more space if more is
available.  For example on x86 the headroom should be 192 bytes.

On systems that have too large of a cache line size to support storing 1.5K
padding and shared info we default to using 3K buffers and reserve
everything that isn't used for skb_shared_info or the data buffer for
headroom.

Change-ID: I33c641c9a1ea10cf7cc484c2d20985368d2d709a
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 98efd694
Loading
Loading
Loading
Loading
+8 −1
Original line number Original line Diff line number Diff line
@@ -3038,6 +3038,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
		return -ENOMEM;
		return -ENOMEM;
	}
	}


	/* configure Rx buffer alignment */
	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
		clear_ring_build_skb_enabled(ring);
	else
		set_ring_build_skb_enabled(ring);

	/* cache tail for quicker writes, and clear the reg before use */
	/* cache tail for quicker writes, and clear the reg before use */
	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
	writel(0, ring->tail);
	writel(0, ring->tail);
@@ -3079,7 +3085,8 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
		vsi->max_frame = I40E_MAX_RXBUFFER;
		vsi->max_frame = I40E_MAX_RXBUFFER;
		vsi->rx_buf_len = I40E_RXBUFFER_2048;
		vsi->rx_buf_len = I40E_RXBUFFER_2048;
#if (PAGE_SIZE < 8192)
#if (PAGE_SIZE < 8192)
	} else if (vsi->netdev->mtu <= ETH_DATA_LEN) {
	} else if (!I40E_2K_TOO_SMALL_WITH_PADDING &&
		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
		vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
		vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
#endif
#endif
+13 −2
Original line number Original line Diff line number Diff line
@@ -1247,6 +1247,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
	writel(val, rx_ring->tail);
	writel(val, rx_ring->tail);
}
}


/**
 * i40e_rx_offset - Return expected offset into page to access data
 * @rx_ring: Ring we are requesting offset of
 *
 * Returns the offset value for ring into the data buffer.
 */
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}

/**
/**
 * i40e_alloc_mapped_page - recycle or make a new page
 * i40e_alloc_mapped_page - recycle or make a new page
 * @rx_ring: ring to use
 * @rx_ring: ring to use
@@ -1291,7 +1302,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,


	bi->dma = dma;
	bi->dma = dma;
	bi->page = page;
	bi->page = page;
	bi->page_offset = 0;
	bi->page_offset = i40e_rx_offset(rx_ring);


	/* initialize pagecnt_bias to 1 representing we fully own page */
	/* initialize pagecnt_bias to 1 representing we fully own page */
	bi->pagecnt_bias = 1;
	bi->pagecnt_bias = 1;
@@ -1696,7 +1707,7 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
#if (PAGE_SIZE < 8192)
#if (PAGE_SIZE < 8192)
	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
#else
	unsigned int truesize = SKB_DATA_ALIGN(size);
	unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
#endif


	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+69 −1
Original line number Original line Diff line number Diff line
@@ -135,6 +135,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
#define I40E_RX_DMA_ATTR \
	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)


/* Attempt to maximize the headroom available for incoming frames.  We
 * use a 2K buffer for receives and need 1536/1534 to store the data for
 * the frame.  This leaves us with 512 bytes of room.  From that we need
 * to deduct the space needed for the shared info and the padding needed
 * to IP align the frame.
 *
 * Note: For cache line sizes 256 or larger this value is going to end
 *	 up negative.  In these cases we should fall back to the legacy
 *	 receive path.
 */
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))

static inline int i40e_compute_pad(int rx_buf_len)
{
	int page_size, pad_size;

	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;

	return pad_size;
}

static inline int i40e_skb_pad(void)
{
	int rx_buf_len;

	/* If a 2K buffer cannot handle a standard Ethernet frame then
	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
	 *
	 * For a 3K buffer we need to add enough padding to allow for
	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
	 * cache-line alignment.
	 */
	if (I40E_2K_TOO_SMALL_WITH_PADDING)
		rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
	else
		rx_buf_len = I40E_RXBUFFER_1536;

	/* if needed make room for NET_IP_ALIGN */
	rx_buf_len -= NET_IP_ALIGN;

	return i40e_compute_pad(rx_buf_len);
}

#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif

/**
/**
 * i40e_test_staterr - tests bits in Rx descriptor status and error fields
 * i40e_test_staterr - tests bits in Rx descriptor status and error fields
 * @rx_desc: pointer to receive descriptor (in le64 format)
 * @rx_desc: pointer to receive descriptor (in le64 format)
@@ -342,6 +394,7 @@ struct i40e_ring {


	u16 flags;
	u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)


	/* stats structs */
	/* stats structs */
	struct i40e_queue_stats	stats;
	struct i40e_queue_stats	stats;
@@ -369,6 +422,21 @@ struct i40e_ring {
					 */
					 */
} ____cacheline_internodealigned_in_smp;
} ____cacheline_internodealigned_in_smp;


static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
	return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}

static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
	ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}

static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}

enum i40e_latency_range {
enum i40e_latency_range {
	I40E_LOWEST_LATENCY = 0,
	I40E_LOWEST_LATENCY = 0,
	I40E_LOW_LATENCY = 1,
	I40E_LOW_LATENCY = 1,
+13 −2
Original line number Original line Diff line number Diff line
@@ -618,6 +618,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
	writel(val, rx_ring->tail);
	writel(val, rx_ring->tail);
}
}


/**
 * i40e_rx_offset - Return expected offset into page to access data
 * @rx_ring: Ring we are requesting offset of
 *
 * Returns the offset value for ring into the data buffer.
 */
static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
{
	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}

/**
/**
 * i40e_alloc_mapped_page - recycle or make a new page
 * i40e_alloc_mapped_page - recycle or make a new page
 * @rx_ring: ring to use
 * @rx_ring: ring to use
@@ -662,7 +673,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,


	bi->dma = dma;
	bi->dma = dma;
	bi->page = page;
	bi->page = page;
	bi->page_offset = 0;
	bi->page_offset = i40e_rx_offset(rx_ring);


	/* initialize pagecnt_bias to 1 representing we fully own page */
	/* initialize pagecnt_bias to 1 representing we fully own page */
	bi->pagecnt_bias = 1;
	bi->pagecnt_bias = 1;
@@ -1057,7 +1068,7 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
#if (PAGE_SIZE < 8192)
#if (PAGE_SIZE < 8192)
	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
#else
	unsigned int truesize = SKB_DATA_ALIGN(size);
	unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring));
#endif
#endif


	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+69 −1
Original line number Original line Diff line number Diff line
@@ -122,6 +122,58 @@ enum i40e_dyn_idx_t {
#define I40E_RX_DMA_ATTR \
#define I40E_RX_DMA_ATTR \
	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)


/* Attempt to maximize the headroom available for incoming frames.  We
 * use a 2K buffer for receives and need 1536/1534 to store the data for
 * the frame.  This leaves us with 512 bytes of room.  From that we need
 * to deduct the space needed for the shared info and the padding needed
 * to IP align the frame.
 *
 * Note: For cache line sizes 256 or larger this value is going to end
 *	 up negative.  In these cases we should fall back to the legacy
 *	 receive path.
 */
#if (PAGE_SIZE < 8192)
#define I40E_2K_TOO_SMALL_WITH_PADDING \
((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))

static inline int i40e_compute_pad(int rx_buf_len)
{
	int page_size, pad_size;

	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;

	return pad_size;
}

static inline int i40e_skb_pad(void)
{
	int rx_buf_len;

	/* If a 2K buffer cannot handle a standard Ethernet frame then
	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
	 *
	 * For a 3K buffer we need to add enough padding to allow for
	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
	 * cache-line alignment.
	 */
	if (I40E_2K_TOO_SMALL_WITH_PADDING)
		rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
	else
		rx_buf_len = I40E_RXBUFFER_1536;

	/* if needed make room for NET_IP_ALIGN */
	rx_buf_len -= NET_IP_ALIGN;

	return i40e_compute_pad(rx_buf_len);
}

#define I40E_SKB_PAD i40e_skb_pad()
#else
#define I40E_2K_TOO_SMALL_WITH_PADDING false
#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif

/**
/**
 * i40e_test_staterr - tests bits in Rx descriptor status and error fields
 * i40e_test_staterr - tests bits in Rx descriptor status and error fields
 * @rx_desc: pointer to receive descriptor (in le64 format)
 * @rx_desc: pointer to receive descriptor (in le64 format)
@@ -329,6 +381,7 @@ struct i40e_ring {


	u16 flags;
	u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)


	/* stats structs */
	/* stats structs */
	struct i40e_queue_stats	stats;
	struct i40e_queue_stats	stats;
@@ -356,6 +409,21 @@ struct i40e_ring {
					 */
					 */
} ____cacheline_internodealigned_in_smp;
} ____cacheline_internodealigned_in_smp;


static inline bool ring_uses_build_skb(struct i40e_ring *ring)
{
	return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
}

static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
{
	ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}

static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
{
	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}

enum i40e_latency_range {
enum i40e_latency_range {
	I40E_LOWEST_LATENCY = 0,
	I40E_LOWEST_LATENCY = 0,
	I40E_LOW_LATENCY = 1,
	I40E_LOW_LATENCY = 1,
Loading