Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9cde9450 authored by Felix Fietkau's avatar Felix Fietkau Committed by David S. Miller
Browse files

bgmac: implement scatter/gather support



Always use software checksumming, since the hardware does not have any
checksum offload support.
This significantly improves local TCP tx performance.

Signed-off-by: default avatarFelix Fietkau <nbd@openwrt.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 45c9b3c0
Loading
Loading
Loading
Loading
+121 −43
Original line number Original line Diff line number Diff line
@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct bgmac *bgmac,
	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
}
}


static void
bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
		     int i, int len, u32 ctl0)
{
	struct bgmac_slot_info *slot;
	struct bgmac_dma_desc *dma_desc;
	u32 ctl1;

	if (i == ring->num_slots - 1)
		ctl0 |= BGMAC_DESC_CTL0_EOT;

	ctl1 = len & BGMAC_DESC_CTL1_LEN;

	slot = &ring->slots[i];
	dma_desc = &ring->cpu_base[i];
	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
	dma_desc->ctl0 = cpu_to_le32(ctl0);
	dma_desc->ctl1 = cpu_to_le32(ctl1);
}

static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
				    struct bgmac_dma_ring *ring,
				    struct bgmac_dma_ring *ring,
				    struct sk_buff *skb)
				    struct sk_buff *skb)
{
{
	struct device *dma_dev = bgmac->core->dma_dev;
	struct device *dma_dev = bgmac->core->dma_dev;
	struct net_device *net_dev = bgmac->net_dev;
	struct net_device *net_dev = bgmac->net_dev;
	struct bgmac_dma_desc *dma_desc;
	struct bgmac_slot_info *slot = &ring->slots[ring->end];
	struct bgmac_slot_info *slot;
	u32 ctl0, ctl1;
	int free_slots;
	int free_slots;
	int nr_frags;
	u32 flags;
	int index = ring->end;
	int i;


	if (skb->len > BGMAC_DESC_CTL1_LEN) {
	if (skb->len > BGMAC_DESC_CTL1_LEN) {
		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
		bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
		goto err_stop_drop;
		goto err_drop;
	}
	}


	if (skb->ip_summed == CHECKSUM_PARTIAL)
		skb_checksum_help(skb);

	nr_frags = skb_shinfo(skb)->nr_frags;

	if (ring->start <= ring->end)
	if (ring->start <= ring->end)
		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
	else
	else
		free_slots = ring->start - ring->end;
		free_slots = ring->start - ring->end;
	if (free_slots == 1) {

	if (free_slots <= nr_frags + 1) {
		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
		netif_stop_queue(net_dev);
		netif_stop_queue(net_dev);
		return NETDEV_TX_BUSY;
		return NETDEV_TX_BUSY;
	}
	}


	slot = &ring->slots[ring->end];
	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
	slot->skb = skb;
	slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
					DMA_TO_DEVICE);
					DMA_TO_DEVICE);
	if (dma_mapping_error(dma_dev, slot->dma_addr)) {
	if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
		bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
		goto err_dma_head;
			  ring->mmio_base);
		goto err_stop_drop;
	}


	ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
	flags = BGMAC_DESC_CTL0_SOF;
	if (ring->end == ring->num_slots - 1)
	if (!nr_frags)
		ctl0 |= BGMAC_DESC_CTL0_EOT;
		flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
	ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;


	dma_desc = ring->cpu_base;
	bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
	dma_desc += ring->end;
	flags = 0;
	dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));

	dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
	for (i = 0; i < nr_frags; i++) {
	dma_desc->ctl0 = cpu_to_le32(ctl0);
		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
	dma_desc->ctl1 = cpu_to_le32(ctl1);
		int len = skb_frag_size(frag);

		index = (index + 1) % BGMAC_TX_RING_SLOTS;
		slot = &ring->slots[index];
		slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
						  len, DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
			goto err_dma;

		if (i == nr_frags - 1)
			flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;

		bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
	}

	slot->skb = skb;


	netdev_sent_queue(net_dev, skb->len);
	netdev_sent_queue(net_dev, skb->len);


@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
	/* Increase ring->end to point empty slot. We tell hardware the first
	/* Increase ring->end to point empty slot. We tell hardware the first
	 * slot it should *not* read.
	 * slot it should *not* read.
	 */
	 */
	if (++ring->end >= BGMAC_TX_RING_SLOTS)
	ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
		ring->end = 0;
	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
		    ring->index_base +
		    ring->index_base +
		    ring->end * sizeof(struct bgmac_dma_desc));
		    ring->end * sizeof(struct bgmac_dma_desc));


	/* Always keep one slot free to allow detecting bugged calls. */
	free_slots -= nr_frags + 1;
	if (--free_slots == 1)
	if (free_slots < 8)
		netif_stop_queue(net_dev);
		netif_stop_queue(net_dev);


	return NETDEV_TX_OK;
	return NETDEV_TX_OK;


err_stop_drop:
err_dma:
	netif_stop_queue(net_dev);
	dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
			 DMA_TO_DEVICE);

	while (i > 0) {
		int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
		struct bgmac_slot_info *slot = &ring->slots[index];
		u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
		int len = ctl1 & BGMAC_DESC_CTL1_LEN;

		dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
	}

err_dma_head:
	bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
		  ring->mmio_base);

err_drop:
	dev_kfree_skb(skb);
	dev_kfree_skb(skb);
	return NETDEV_TX_OK;
	return NETDEV_TX_OK;
}
}
@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)


	while (ring->start != empty_slot) {
	while (ring->start != empty_slot) {
		struct bgmac_slot_info *slot = &ring->slots[ring->start];
		struct bgmac_slot_info *slot = &ring->slots[ring->start];
		u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
		int len = ctl1 & BGMAC_DESC_CTL1_LEN;


		if (slot->skb) {
		if (!slot->dma_addr) {
			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
				  ring->start, ring->end);
			goto next;
		}

		if (ctl1 & BGMAC_DESC_CTL0_SOF)
			/* Unmap no longer used buffer */
			/* Unmap no longer used buffer */
			dma_unmap_single(dma_dev, slot->dma_addr,
			dma_unmap_single(dma_dev, slot->dma_addr, len,
					 slot->skb->len, DMA_TO_DEVICE);
					 DMA_TO_DEVICE);
			slot->dma_addr = 0;
		else
			dma_unmap_page(dma_dev, slot->dma_addr, len,
				       DMA_TO_DEVICE);


		if (slot->skb) {
			bytes_compl += slot->skb->len;
			bytes_compl += slot->skb->len;
			pkts_compl++;
			pkts_compl++;


			/* Free memory! :) */
			/* Free memory! :) */
			dev_kfree_skb(slot->skb);
			dev_kfree_skb(slot->skb);
			slot->skb = NULL;
			slot->skb = NULL;
		} else {
			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
				  ring->start, ring->end);
		}
		}


next:
		slot->dma_addr = 0;
		if (++ring->start >= BGMAC_TX_RING_SLOTS)
		if (++ring->start >= BGMAC_TX_RING_SLOTS)
			ring->start = 0;
			ring->start = 0;
		freed = true;
		freed = true;
	}
	}


	if (!pkts_compl)
		return;

	netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
	netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);


	if (freed && netif_queue_stopped(bgmac->net_dev))
	if (netif_queue_stopped(bgmac->net_dev))
		netif_wake_queue(bgmac->net_dev);
		netif_wake_queue(bgmac->net_dev);
}
}


@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struct bgmac *bgmac,
				   struct bgmac_dma_ring *ring)
				   struct bgmac_dma_ring *ring)
{
{
	struct device *dma_dev = bgmac->core->dma_dev;
	struct device *dma_dev = bgmac->core->dma_dev;
	struct bgmac_dma_desc *dma_desc = ring->cpu_base;
	struct bgmac_slot_info *slot;
	struct bgmac_slot_info *slot;
	int i;
	int i;


	for (i = 0; i < ring->num_slots; i++) {
	for (i = 0; i < ring->num_slots; i++) {
		int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;

		slot = &ring->slots[i];
		slot = &ring->slots[i];
		if (slot->skb) {
			if (slot->dma_addr)
				dma_unmap_single(dma_dev, slot->dma_addr,
						 slot->skb->len, DMA_TO_DEVICE);
		dev_kfree_skb(slot->skb);
		dev_kfree_skb(slot->skb);
		}

		if (!slot->dma_addr)
			continue;

		if (slot->skb)
			dma_unmap_single(dma_dev, slot->dma_addr,
					 len, DMA_TO_DEVICE);
		else
			dma_unmap_page(dma_dev, slot->dma_addr,
				       len, DMA_TO_DEVICE);
	}
	}
}
}


@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_device *core)
		goto err_dma_free;
		goto err_dma_free;
	}
	}


	net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
	net_dev->hw_features = net_dev->features;
	net_dev->vlan_features = net_dev->features;

	err = register_netdev(bgmac->net_dev);
	err = register_netdev(bgmac->net_dev);
	if (err) {
	if (err) {
		bgmac_err(bgmac, "Cannot register net device\n");
		bgmac_err(bgmac, "Cannot register net device\n");