Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3eb1a40f authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher
Browse files

igbvf: Make next_to_watch a pointer and adjust memory barriers to avoid races



This change is meant to address several race issues that become possible
because next_to_watch could possibly be set to a value that shows that the
descriptor is done when it is not.  In order to correct that we instead make
next_to_watch a pointer that is set to NULL during cleanup, and set to the
eop_desc after the descriptor rings have been written.

To enforce proper ordering the next_to_watch pointer is not set until after
a wmb writing the values to the last descriptor in a transmit.  In order to
guarantee that the descriptor is not read until after the eop_desc we use the
read_barrier_depends which is only really necessary on the alpha architecture.

Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Acked-by: default avatarGreg Rose <gregory.v.rose@intel.com>
Tested-by: default avatarSibai Li <sibai.li@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent e792cd91
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -127,8 +127,8 @@ struct igbvf_buffer {
		/* Tx */
		/* Tx */
		struct {
		struct {
			unsigned long time_stamp;
			unsigned long time_stamp;
			union e1000_adv_tx_desc *next_to_watch;
			u16 length;
			u16 length;
			u16 next_to_watch;
			u16 mapped_as_page;
			u16 mapped_as_page;
		};
		};
		/* Rx */
		/* Rx */
+30 −22
Original line number Original line Diff line number Diff line
@@ -797,20 +797,31 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
	struct sk_buff *skb;
	struct sk_buff *skb;
	union e1000_adv_tx_desc *tx_desc, *eop_desc;
	union e1000_adv_tx_desc *tx_desc, *eop_desc;
	unsigned int total_bytes = 0, total_packets = 0;
	unsigned int total_bytes = 0, total_packets = 0;
	unsigned int i, eop, count = 0;
	unsigned int i, count = 0;
	bool cleaned = false;
	bool cleaned = false;


	i = tx_ring->next_to_clean;
	i = tx_ring->next_to_clean;
	eop = tx_ring->buffer_info[i].next_to_watch;
	buffer_info = &tx_ring->buffer_info[i];
	eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
	eop_desc = buffer_info->next_to_watch;

	do {
		/* if next_to_watch is not set then there is no work pending */
		if (!eop_desc)
			break;

		/* prevent any other reads prior to eop_desc */
		read_barrier_depends();

		/* if DD is not set pending work has not been completed */
		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
			break;

		/* clear next_to_watch to prevent false hangs */
		buffer_info->next_to_watch = NULL;


	while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
	       (count < tx_ring->count)) {
		rmb();	/* read buffer_info after eop_desc status */
		for (cleaned = false; !cleaned; count++) {
		for (cleaned = false; !cleaned; count++) {
			tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
			tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
			buffer_info = &tx_ring->buffer_info[i];
			cleaned = (tx_desc == eop_desc);
			cleaned = (i == eop);
			skb = buffer_info->skb;
			skb = buffer_info->skb;


			if (skb) {
			if (skb) {
@@ -831,10 +842,12 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
			i++;
			i++;
			if (i == tx_ring->count)
			if (i == tx_ring->count)
				i = 0;
				i = 0;

			buffer_info = &tx_ring->buffer_info[i];
		}
		}
		eop = tx_ring->buffer_info[i].next_to_watch;

		eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
		eop_desc = buffer_info->next_to_watch;
	}
	} while (count < tx_ring->count);


	tx_ring->next_to_clean = i;
	tx_ring->next_to_clean = i;


@@ -1961,7 +1974,6 @@ static int igbvf_tso(struct igbvf_adapter *adapter,
	context_desc->seqnum_seed = 0;
	context_desc->seqnum_seed = 0;


	buffer_info->time_stamp = jiffies;
	buffer_info->time_stamp = jiffies;
	buffer_info->next_to_watch = i;
	buffer_info->dma = 0;
	buffer_info->dma = 0;
	i++;
	i++;
	if (i == tx_ring->count)
	if (i == tx_ring->count)
@@ -2021,7 +2033,6 @@ static inline bool igbvf_tx_csum(struct igbvf_adapter *adapter,
		context_desc->mss_l4len_idx = 0;
		context_desc->mss_l4len_idx = 0;


		buffer_info->time_stamp = jiffies;
		buffer_info->time_stamp = jiffies;
		buffer_info->next_to_watch = i;
		buffer_info->dma = 0;
		buffer_info->dma = 0;
		i++;
		i++;
		if (i == tx_ring->count)
		if (i == tx_ring->count)
@@ -2061,8 +2072,7 @@ static int igbvf_maybe_stop_tx(struct net_device *netdev, int size)


static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
                                   struct igbvf_ring *tx_ring,
                                   struct igbvf_ring *tx_ring,
                                   struct sk_buff *skb,
				   struct sk_buff *skb)
                                   unsigned int first)
{
{
	struct igbvf_buffer *buffer_info;
	struct igbvf_buffer *buffer_info;
	struct pci_dev *pdev = adapter->pdev;
	struct pci_dev *pdev = adapter->pdev;
@@ -2077,7 +2087,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
	buffer_info->length = len;
	buffer_info->length = len;
	/* set time_stamp *before* dma to help avoid a possible race */
	/* set time_stamp *before* dma to help avoid a possible race */
	buffer_info->time_stamp = jiffies;
	buffer_info->time_stamp = jiffies;
	buffer_info->next_to_watch = i;
	buffer_info->mapped_as_page = false;
	buffer_info->mapped_as_page = false;
	buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
	buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
					  DMA_TO_DEVICE);
					  DMA_TO_DEVICE);
@@ -2100,7 +2109,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
		BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
		BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
		buffer_info->length = len;
		buffer_info->length = len;
		buffer_info->time_stamp = jiffies;
		buffer_info->time_stamp = jiffies;
		buffer_info->next_to_watch = i;
		buffer_info->mapped_as_page = true;
		buffer_info->mapped_as_page = true;
		buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
		buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
						DMA_TO_DEVICE);
						DMA_TO_DEVICE);
@@ -2109,7 +2117,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
	}
	}


	tx_ring->buffer_info[i].skb = skb;
	tx_ring->buffer_info[i].skb = skb;
	tx_ring->buffer_info[first].next_to_watch = i;


	return ++count;
	return ++count;


@@ -2120,7 +2127,6 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
	buffer_info->dma = 0;
	buffer_info->dma = 0;
	buffer_info->time_stamp = 0;
	buffer_info->time_stamp = 0;
	buffer_info->length = 0;
	buffer_info->length = 0;
	buffer_info->next_to_watch = 0;
	buffer_info->mapped_as_page = false;
	buffer_info->mapped_as_page = false;
	if (count)
	if (count)
		count--;
		count--;
@@ -2139,7 +2145,8 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,


static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
                                      struct igbvf_ring *tx_ring,
                                      struct igbvf_ring *tx_ring,
                                      int tx_flags, int count, u32 paylen,
				      int tx_flags, int count,
				      unsigned int first, u32 paylen,
                                      u8 hdr_len)
                                      u8 hdr_len)
{
{
	union e1000_adv_tx_desc *tx_desc = NULL;
	union e1000_adv_tx_desc *tx_desc = NULL;
@@ -2189,6 +2196,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
	 * such as IA-64). */
	 * such as IA-64). */
	wmb();
	wmb();


	tx_ring->buffer_info[first].next_to_watch = tx_desc;
	tx_ring->next_to_use = i;
	tx_ring->next_to_use = i;
	writel(i, adapter->hw.hw_addr + tx_ring->tail);
	writel(i, adapter->hw.hw_addr + tx_ring->tail);
	/* we need this if more than one processor can write to our tail
	/* we need this if more than one processor can write to our tail
@@ -2255,11 +2263,11 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
	 * count reflects descriptors mapped, if 0 then mapping error
	 * count reflects descriptors mapped, if 0 then mapping error
	 * has occurred and we need to rewind the descriptor queue
	 * has occurred and we need to rewind the descriptor queue
	 */
	 */
	count = igbvf_tx_map_adv(adapter, tx_ring, skb, first);
	count = igbvf_tx_map_adv(adapter, tx_ring, skb);


	if (count) {
	if (count) {
		igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
		igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
		                   skb->len, hdr_len);
				   first, skb->len, hdr_len);
		/* Make sure there is space in the ring for the next send. */
		/* Make sure there is space in the ring for the next send. */
		igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
		igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
	} else {
	} else {