Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1943d8ba authored by Jesse Brandeburg's avatar Jesse Brandeburg Committed by Jeff Kirsher
Browse files

i40e/i40evf: enable hardware feature head write back



The hardware supports a feature to avoid updating the descriptor
ring by marking each descriptor with a DD bit, and instead
writes a memory location with an update to where the driver
should clean up to.  Enable this feature.

Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57
Signed-off-by: default avatarJesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: default avatarMitch Williams <mitch.a.williams@intel.com>
Signed-off-by: default avatarCatherine Sullivan <catherine.sullivan@intel.com>
Tested-by: default avatarKavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 6c167f58
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
					       I40E_FLAG_FD_ATR_ENABLED));
	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
	/* FDIR VSI tx ring can still use RS bit and writebacks */
	if (vsi->type != I40E_VSI_FDIR)
		tx_ctx.head_wb_ena = 1;
	tx_ctx.head_wb_addr = ring->dma +
			      (ring->count * sizeof(struct i40e_tx_desc));

	/* As part of VSI creation/update, FW allocates certain
	 * Tx arbitration queue sets for each TC enabled for
+40 −6
Original line number Diff line number Diff line
@@ -618,6 +618,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
	return ret;
}

/**
 * i40e_get_head - Retrieve head from head writeback
 * @tx_ring:  tx ring to fetch head of
 *
 * Returns value of Tx ring head based on value stored
 * in head write-back location
 **/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;

	return le32_to_cpu(*(volatile __le32 *)head);
}

/**
 * i40e_clean_tx_irq - Reclaim resources after transmit completes
 * @tx_ring:  tx ring to clean
@@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
{
	u16 i = tx_ring->next_to_clean;
	struct i40e_tx_buffer *tx_buf;
	struct i40e_tx_desc *tx_head;
	struct i40e_tx_desc *tx_desc;
	unsigned int total_packets = 0;
	unsigned int total_bytes = 0;
@@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
	tx_desc = I40E_TX_DESC(tx_ring, i);
	i -= tx_ring->count;

	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));

	do {
		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;

@@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
		/* prevent any other reads prior to eop_desc */
		read_barrier_depends();

		/* if the descriptor isn't done, no work yet to do */
		if (!(eop_desc->cmd_type_offset_bsz &
		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
		/* we have caught up to head, no work left to do */
		if (tx_head == tx_desc)
			break;

		/* clear next_to_watch to prevent false hangs */
@@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)

	/* round up to nearest 4K */
	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
	/* add u32 for head writeback, align after this takes care of
	 * guaranteeing this is at least one cache line in size
	 */
	tx_ring->size += sizeof(u32);
	tx_ring->size = ALIGN(tx_ring->size, 4096);
	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
					   &tx_ring->dma, GFP_KERNEL);
@@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
		tx_bi = &tx_ring->tx_bi[i];
	}

	/* Place RS bit on last descriptor of any packet that spans across the
	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
	 */
#define WB_STRIDE 0x3
	if (((i & WB_STRIDE) != WB_STRIDE) &&
	    (first <= &tx_ring->tx_bi[i]) &&
	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
		tx_desc->cmd_type_offset_bsz =
			build_ctob(td_cmd, td_offset, size, td_tag) |
		cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
					 I40E_TXD_QW1_CMD_SHIFT);
	} else {
		tx_desc->cmd_type_offset_bsz =
			build_ctob(td_cmd, td_offset, size, td_tag) |
			cpu_to_le64((u64)I40E_TXD_CMD <<
					 I40E_TXD_QW1_CMD_SHIFT);
	}

	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
						 tx_ring->queue_index),
+3 −0
Original line number Diff line number Diff line
@@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
	tx_ctx.qlen = info->ring_len;
	tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
	tx_ctx.rdylist_act = 0;
	tx_ctx.head_wb_ena = 1;
	tx_ctx.head_wb_addr = info->dma_ring_addr +
			      (info->ring_len * sizeof(struct i40e_tx_desc));

	/* clear the context in the HMC */
	ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
+40 −6
Original line number Diff line number Diff line
@@ -169,6 +169,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
	return ret;
}

/**
 * i40e_get_head - Retrieve head from head writeback
 * @tx_ring:  tx ring to fetch head of
 *
 * Returns value of Tx ring head based on value stored
 * in head write-back location
 **/
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
{
	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;

	return le32_to_cpu(*(volatile __le32 *)head);
}

/**
 * i40e_clean_tx_irq - Reclaim resources after transmit completes
 * @tx_ring:  tx ring to clean
@@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
{
	u16 i = tx_ring->next_to_clean;
	struct i40e_tx_buffer *tx_buf;
	struct i40e_tx_desc *tx_head;
	struct i40e_tx_desc *tx_desc;
	unsigned int total_packets = 0;
	unsigned int total_bytes = 0;
@@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
	tx_desc = I40E_TX_DESC(tx_ring, i);
	i -= tx_ring->count;

	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));

	do {
		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;

@@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
		/* prevent any other reads prior to eop_desc */
		read_barrier_depends();

		/* if the descriptor isn't done, no work yet to do */
		if (!(eop_desc->cmd_type_offset_bsz &
		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
		/* we have caught up to head, no work left to do */
		if (tx_head == tx_desc)
			break;

		/* clear next_to_watch to prevent false hangs */
@@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)

	/* round up to nearest 4K */
	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
	/* add u32 for head writeback, align after this takes care of
	 * guaranteeing this is at least one cache line in size
	 */
	tx_ring->size += sizeof(u32);
	tx_ring->size = ALIGN(tx_ring->size, 4096);
	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
					   &tx_ring->dma, GFP_KERNEL);
@@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
		tx_bi = &tx_ring->tx_bi[i];
	}

	/* Place RS bit on last descriptor of any packet that spans across the
	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
	 */
#define WB_STRIDE 0x3
	if (((i & WB_STRIDE) != WB_STRIDE) &&
	    (first <= &tx_ring->tx_bi[i]) &&
	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
		tx_desc->cmd_type_offset_bsz =
			build_ctob(td_cmd, td_offset, size, td_tag) |
		cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
					 I40E_TXD_QW1_CMD_SHIFT);
	} else {
		tx_desc->cmd_type_offset_bsz =
			build_ctob(td_cmd, td_offset, size, td_tag) |
			cpu_to_le64((u64)I40E_TXD_CMD <<
					 I40E_TXD_QW1_CMD_SHIFT);
	}

	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
						 tx_ring->queue_index),