Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b01867cb authored by Ayaz Abdulla's avatar Ayaz Abdulla Committed by Jeff Garzik
Browse files

forcedeth: rx data path optimization



This patch optimizes the rx data paths and cleans up the code.

Signed-Off-By: default avatarAyaz Abdulla <aabdulla@nvidia.com>

Signed-off-by: default avatarJeff Garzik <jeff@garzik.org>
parent 445583b8
Loading
Loading
Loading
Loading
+148 −182
Original line number Original line Diff line number Diff line
@@ -1317,9 +1317,9 @@ static int nv_alloc_rx(struct net_device *dev)
			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
			wmb();
			wmb();
			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
			if (np->put_rx.orig++ == np->last_rx.orig)
			if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
				np->put_rx.orig = np->first_rx.orig;
				np->put_rx.orig = np->first_rx.orig;
			if (np->put_rx_ctx++ == np->last_rx_ctx)
			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
				np->put_rx_ctx = np->first_rx_ctx;
				np->put_rx_ctx = np->first_rx_ctx;
		} else {
		} else {
			return 1;
			return 1;
@@ -1349,9 +1349,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
			wmb();
			wmb();
			np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
			np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
			if (np->put_rx.ex++ == np->last_rx.ex)
			if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
				np->put_rx.ex = np->first_rx.ex;
				np->put_rx.ex = np->first_rx.ex;
			if (np->put_rx_ctx++ == np->last_rx_ctx)
			if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
				np->put_rx_ctx = np->first_rx_ctx;
				np->put_rx_ctx = np->first_rx_ctx;
		} else {
		} else {
			return 1;
			return 1;
@@ -2046,24 +2046,17 @@ static int nv_rx_process(struct net_device *dev, int limit)
{
{
	struct fe_priv *np = netdev_priv(dev);
	struct fe_priv *np = netdev_priv(dev);
	u32 flags;
	u32 flags;
	u32 vlanflags = 0;
	u32 rx_processed_cnt = 0;
	int count;

	for (count = 0; count < limit; ++count) {
	struct sk_buff *skb;
	struct sk_buff *skb;
	int len;
	int len;


		if (np->get_rx.orig == np->put_rx.orig)
	while((np->get_rx.orig != np->put_rx.orig) &&
			break;	/* we scanned the whole ring - do not continue */
	      !((flags = le32_to_cpu(np->get_rx.orig->flaglen)) & NV_RX_AVAIL) &&
		flags = le32_to_cpu(np->get_rx.orig->flaglen);
		(rx_processed_cnt++ < limit)) {
		len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);


		dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
		dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
					dev->name, flags);
					dev->name, flags);


		if (flags & NV_RX_AVAIL)
			break;	/* still owned by hardware, */

		/*
		/*
		 * the packet is for us - immediately tear down the pci mapping.
		 * the packet is for us - immediately tear down the pci mapping.
		 * TODO: check if a prefetch of the first cacheline improves
		 * TODO: check if a prefetch of the first cacheline improves
@@ -2087,35 +2080,9 @@ static int nv_rx_process(struct net_device *dev, int limit)
		}
		}
		/* look at what we actually got: */
		/* look at what we actually got: */
		if (np->desc_ver == DESC_VER_1) {
		if (np->desc_ver == DESC_VER_1) {
			if (!(flags & NV_RX_DESCRIPTORVALID)) {
			if (likely(flags & NV_RX_DESCRIPTORVALID)) {
				dev_kfree_skb(skb);
				len = flags & LEN_MASK_V1;
				goto next_pkt;
				if (unlikely(flags & NV_RX_ERROR)) {
			}

			if (flags & NV_RX_ERROR) {
				if (flags & NV_RX_MISSEDFRAME) {
					np->stats.rx_missed_errors++;
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
				if (flags & (NV_RX_ERROR1|NV_RX_ERROR2|NV_RX_ERROR3)) {
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
				if (flags & NV_RX_CRCERR) {
					np->stats.rx_crc_errors++;
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
				if (flags & NV_RX_OVERFLOW) {
					np->stats.rx_over_errors++;
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
					if (flags & NV_RX_ERROR4) {
					if (flags & NV_RX_ERROR4) {
						len = nv_getlen(dev, skb->data, len);
						len = nv_getlen(dev, skb->data, len);
						if (len < 0) {
						if (len < 0) {
@@ -2124,37 +2091,33 @@ static int nv_rx_process(struct net_device *dev, int limit)
							goto next_pkt;
							goto next_pkt;
						}
						}
					}
					}
				/* framing errors are soft errors. */
					/* framing errors are soft errors */
				if (flags & NV_RX_FRAMINGERR) {
					else if (flags & NV_RX_FRAMINGERR) {
						if (flags & NV_RX_SUBSTRACT1) {
						if (flags & NV_RX_SUBSTRACT1) {
							len--;
							len--;
						}
						}
					}
					}
			}
					/* the rest are hard errors */
		} else {
					else {
			if (!(flags & NV_RX2_DESCRIPTORVALID)) {
						if (flags & NV_RX_MISSEDFRAME)
				dev_kfree_skb(skb);
							np->stats.rx_missed_errors++;
				goto next_pkt;
						if (flags & NV_RX_CRCERR)
			}

			if (flags & NV_RX2_ERROR) {
				if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
				if (flags & NV_RX2_CRCERR) {
							np->stats.rx_crc_errors++;
							np->stats.rx_crc_errors++;
						if (flags & NV_RX_OVERFLOW)
							np->stats.rx_over_errors++;
						np->stats.rx_errors++;
						np->stats.rx_errors++;
						dev_kfree_skb(skb);
						dev_kfree_skb(skb);
						goto next_pkt;
						goto next_pkt;
					}
					}
				if (flags & NV_RX2_OVERFLOW) {
				}
					np->stats.rx_over_errors++;
			} else {
					np->stats.rx_errors++;
				dev_kfree_skb(skb);
				dev_kfree_skb(skb);
				goto next_pkt;
				goto next_pkt;
			}
			}
		} else {
			if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
				len = flags & LEN_MASK_V2;
				if (unlikely(flags & NV_RX2_ERROR)) {
					if (flags & NV_RX2_ERROR4) {
					if (flags & NV_RX2_ERROR4) {
						len = nv_getlen(dev, skb->data, len);
						len = nv_getlen(dev, skb->data, len);
						if (len < 0) {
						if (len < 0) {
@@ -2164,23 +2127,34 @@ static int nv_rx_process(struct net_device *dev, int limit)
						}
						}
					}
					}
					/* framing errors are soft errors */
					/* framing errors are soft errors */
				if (flags & NV_RX2_FRAMINGERR) {
					else if (flags & NV_RX2_FRAMINGERR) {
						if (flags & NV_RX2_SUBSTRACT1) {
						if (flags & NV_RX2_SUBSTRACT1) {
							len--;
							len--;
						}
						}
					}
					}
					/* the rest are hard errors */
					else {
						if (flags & NV_RX2_CRCERR)
							np->stats.rx_crc_errors++;
						if (flags & NV_RX2_OVERFLOW)
							np->stats.rx_over_errors++;
						np->stats.rx_errors++;
						dev_kfree_skb(skb);
						goto next_pkt;
					}
				}
				}
			if (np->rx_csum) {
				if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
				flags &= NV_RX2_CHECKSUMMASK;
				if (flags == NV_RX2_CHECKSUMOK1 ||
				    flags == NV_RX2_CHECKSUMOK2 ||
				    flags == NV_RX2_CHECKSUMOK3) {
					dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
					skb->ip_summed = CHECKSUM_UNNECESSARY;
					skb->ip_summed = CHECKSUM_UNNECESSARY;
				} else {
				} else {
					dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
					if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
					    (flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
						skb->ip_summed = CHECKSUM_UNNECESSARY;
					}
					}
				}
				}
			} else {
				dev_kfree_skb(skb);
				goto next_pkt;
			}
		}
		}
		/* got a valid packet - forward it to the network core */
		/* got a valid packet - forward it to the network core */
		skb_put(skb, len);
		skb_put(skb, len);
@@ -2188,29 +2162,21 @@ static int nv_rx_process(struct net_device *dev, int limit)
		dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
		dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
					dev->name, len, skb->protocol);
					dev->name, len, skb->protocol);
#ifdef CONFIG_FORCEDETH_NAPI
#ifdef CONFIG_FORCEDETH_NAPI
		if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
			vlan_hwaccel_receive_skb(skb, np->vlangrp,
						 vlanflags & NV_RX3_VLAN_TAG_MASK);
		else
		netif_receive_skb(skb);
		netif_receive_skb(skb);
#else
#else
		if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
			vlan_hwaccel_rx(skb, np->vlangrp,
					vlanflags & NV_RX3_VLAN_TAG_MASK);
		else
		netif_rx(skb);
		netif_rx(skb);
#endif
#endif
		dev->last_rx = jiffies;
		dev->last_rx = jiffies;
		np->stats.rx_packets++;
		np->stats.rx_packets++;
		np->stats.rx_bytes += len;
		np->stats.rx_bytes += len;
next_pkt:
next_pkt:
		if (np->get_rx.orig++ == np->last_rx.orig)
		if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
			np->get_rx.orig = np->first_rx.orig;
			np->get_rx.orig = np->first_rx.orig;
		if (np->get_rx_ctx++ == np->last_rx_ctx)
		if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
			np->get_rx_ctx = np->first_rx_ctx;
			np->get_rx_ctx = np->first_rx_ctx;
	}
	}


	return count;
	return rx_processed_cnt;
}
}


static int nv_rx_process_optimized(struct net_device *dev, int limit)
static int nv_rx_process_optimized(struct net_device *dev, int limit)
@@ -2218,24 +2184,17 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
	struct fe_priv *np = netdev_priv(dev);
	struct fe_priv *np = netdev_priv(dev);
	u32 flags;
	u32 flags;
	u32 vlanflags = 0;
	u32 vlanflags = 0;
	int count;
	u32 rx_processed_cnt = 0;

	for (count = 0; count < limit; ++count) {
	struct sk_buff *skb;
	struct sk_buff *skb;
	int len;
	int len;


		if (np->get_rx.ex == np->put_rx.ex)
	while((np->get_rx.ex != np->put_rx.ex) &&
			break;	/* we scanned the whole ring - do not continue */
	      !((flags = le32_to_cpu(np->get_rx.ex->flaglen)) & NV_RX2_AVAIL) &&
		flags = le32_to_cpu(np->get_rx.ex->flaglen);
	      (rx_processed_cnt++ < limit)) {
		len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
		vlanflags = le32_to_cpu(np->get_rx.ex->buflow);


		dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
		dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
					dev->name, flags);
					dev->name, flags);


		if (flags & NV_RX_AVAIL)
			break;	/* still owned by hardware, */

		/*
		/*
		 * the packet is for us - immediately tear down the pci mapping.
		 * the packet is for us - immediately tear down the pci mapping.
		 * TODO: check if a prefetch of the first cacheline improves
		 * TODO: check if a prefetch of the first cacheline improves
@@ -2258,29 +2217,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
			dprintk("\n");
			dprintk("\n");
		}
		}
		/* look at what we actually got: */
		/* look at what we actually got: */
		if (!(flags & NV_RX2_DESCRIPTORVALID)) {
		if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
			dev_kfree_skb(skb);
			len = flags & LEN_MASK_V2;
			goto next_pkt;
			if (unlikely(flags & NV_RX2_ERROR)) {
		}

		if (flags & NV_RX2_ERROR) {
			if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
				np->stats.rx_errors++;
				dev_kfree_skb(skb);
				goto next_pkt;
			}
			if (flags & NV_RX2_CRCERR) {
				np->stats.rx_crc_errors++;
				np->stats.rx_errors++;
				dev_kfree_skb(skb);
				goto next_pkt;
			}
			if (flags & NV_RX2_OVERFLOW) {
				np->stats.rx_over_errors++;
				np->stats.rx_errors++;
				dev_kfree_skb(skb);
				goto next_pkt;
			}
				if (flags & NV_RX2_ERROR4) {
				if (flags & NV_RX2_ERROR4) {
					len = nv_getlen(dev, skb->data, len);
					len = nv_getlen(dev, skb->data, len);
					if (len < 0) {
					if (len < 0) {
@@ -2290,52 +2229,79 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
					}
					}
				}
				}
				/* framing errors are soft errors */
				/* framing errors are soft errors */
			if (flags & NV_RX2_FRAMINGERR) {
				else if (flags & NV_RX2_FRAMINGERR) {
					if (flags & NV_RX2_SUBSTRACT1) {
					if (flags & NV_RX2_SUBSTRACT1) {
						len--;
						len--;
					}
					}
				}
				}
				/* the rest are hard errors */
				else {
					if (flags & NV_RX2_CRCERR)
						np->stats.rx_crc_errors++;
					if (flags & NV_RX2_OVERFLOW)
						np->stats.rx_over_errors++;
					np->stats.rx_errors++;
					dev_kfree_skb(skb);
					goto next_pkt;
				}
				}
		if (np->rx_csum) {
			}
			flags &= NV_RX2_CHECKSUMMASK;

			if (flags == NV_RX2_CHECKSUMOK1 ||
			if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
			    flags == NV_RX2_CHECKSUMOK2 ||
			    flags == NV_RX2_CHECKSUMOK3) {
				dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
				skb->ip_summed = CHECKSUM_UNNECESSARY;
				skb->ip_summed = CHECKSUM_UNNECESSARY;
			} else {
			} else {
				dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
				if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
				    (flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
					skb->ip_summed = CHECKSUM_UNNECESSARY;
				}
				}
			}
			}

			/* got a valid packet - forward it to the network core */
			/* got a valid packet - forward it to the network core */
			skb_put(skb, len);
			skb_put(skb, len);
			skb->protocol = eth_type_trans(skb, dev);
			skb->protocol = eth_type_trans(skb, dev);
		dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
			prefetch(skb->data);

			dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: %d bytes, proto %d accepted.\n",
				dev->name, len, skb->protocol);
				dev->name, len, skb->protocol);

			if (likely(!np->vlangrp)) {
#ifdef CONFIG_FORCEDETH_NAPI
				netif_receive_skb(skb);
#else
				netif_rx(skb);
#endif
			} else {
				vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
				if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
#ifdef CONFIG_FORCEDETH_NAPI
#ifdef CONFIG_FORCEDETH_NAPI
		if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
					vlan_hwaccel_receive_skb(skb, np->vlangrp,
					vlan_hwaccel_receive_skb(skb, np->vlangrp,
								 vlanflags & NV_RX3_VLAN_TAG_MASK);
								 vlanflags & NV_RX3_VLAN_TAG_MASK);
		else
			netif_receive_skb(skb);
#else
#else
		if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
					vlan_hwaccel_rx(skb, np->vlangrp,
					vlan_hwaccel_rx(skb, np->vlangrp,
							vlanflags & NV_RX3_VLAN_TAG_MASK);
							vlanflags & NV_RX3_VLAN_TAG_MASK);
		else
#endif
				} else {
#ifdef CONFIG_FORCEDETH_NAPI
					netif_receive_skb(skb);
#else
					netif_rx(skb);
					netif_rx(skb);
#endif
#endif
				}
			}

			dev->last_rx = jiffies;
			dev->last_rx = jiffies;
			np->stats.rx_packets++;
			np->stats.rx_packets++;
			np->stats.rx_bytes += len;
			np->stats.rx_bytes += len;
		} else {
			dev_kfree_skb(skb);
		}
next_pkt:
next_pkt:
		if (np->get_rx.ex++ == np->last_rx.ex)
		if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
			np->get_rx.ex = np->first_rx.ex;
			np->get_rx.ex = np->first_rx.ex;
		if (np->get_rx_ctx++ == np->last_rx_ctx)
		if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
			np->get_rx_ctx = np->first_rx_ctx;
			np->get_rx_ctx = np->first_rx_ctx;
	}
	}


	return count;
	return rx_processed_cnt;
}
}


static void set_bufsize(struct net_device *dev)
static void set_bufsize(struct net_device *dev)