Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22adfe0a authored by Santosh Rastapur's avatar Santosh Rastapur Committed by David S. Miller
Browse files

cxgb4: Add T5 write combining support



This patch implements a low latency Write Combining (aka Write Coalescing) work
request path. PCIE maps User Space Doorbell BAR2 region writes to the new
interface to SGE. SGE pulls a new message from PCIE new interface and if its a
coalesced write work request then pushes it for processing. This patch copies
coalesced work request to memory mapped BAR2 space.

Signed-off-by: default avatarSantosh Rastapur <santosh@chelsio.com>
Signed-off-by: default avatarVipul Pandya <vipul@chelsio.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 251f9e88
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -439,6 +439,7 @@ struct sge_txq {
	spinlock_t db_lock;
	int db_disabled;
	unsigned short db_pidx;
	u64 udb;
};

struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
@@ -543,6 +544,7 @@ enum chip_type {

struct adapter {
	void __iomem *regs;
	void __iomem *bar2;
	struct pci_dev *pdev;
	struct device *pdev_dev;
	unsigned int mbox;
+51 −2
Original line number Diff line number Diff line
@@ -1327,6 +1327,8 @@ static char stats_strings[][ETH_GSTRING_LEN] = {
	"VLANinsertions     ",
	"GROpackets         ",
	"GROmerged          ",
	"WriteCoalSuccess   ",
	"WriteCoalFail      ",
};

static int get_sset_count(struct net_device *dev, int sset)
@@ -1422,11 +1424,25 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
{
	struct port_info *pi = netdev_priv(dev);
	struct adapter *adapter = pi->adapter;
	u32 val1, val2;

	t4_get_port_stats(adapter, pi->tx_chan, (struct port_stats *)data);

	data += sizeof(struct port_stats) / sizeof(u64);
	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
	data += sizeof(struct queue_port_stats) / sizeof(u64);
	if (!is_t4(adapter->chip)) {
		t4_write_reg(adapter, SGE_STAT_CFG, STATSOURCE_T5(7));
		val1 = t4_read_reg(adapter, SGE_STAT_TOTAL);
		val2 = t4_read_reg(adapter, SGE_STAT_MATCH);
		*data = val1 - val2;
		data++;
		*data = val2;
		data++;
	} else {
		memset(data, 0, 2 * sizeof(u64));
		*data += 2;
	}
}

/*
@@ -5337,10 +5353,11 @@ static void free_some_resources(struct adapter *adapter)
#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
#define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
		   NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
#define SEGMENT_SIZE 128

static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
	int func, i, err;
	int func, i, err, s_qpp, qpp, num_seg;
	struct port_info *pi;
	bool highdma = false;
	struct adapter *adapter = NULL;
@@ -5420,7 +5437,34 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)

	err = t4_prep_adapter(adapter);
	if (err)
		goto out_unmap_bar;
		goto out_unmap_bar0;

	if (!is_t4(adapter->chip)) {
		s_qpp = QUEUESPERPAGEPF1 * adapter->fn;
		qpp = 1 << QUEUESPERPAGEPF0_GET(t4_read_reg(adapter,
		      SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp);
		num_seg = PAGE_SIZE / SEGMENT_SIZE;

		/* Each segment size is 128B. Write coalescing is enabled only
		 * when SGE_EGRESS_QUEUES_PER_PAGE_PF reg value for the
		 * queue is less no of segments that can be accommodated in
		 * a page size.
		 */
		if (qpp > num_seg) {
			dev_err(&pdev->dev,
				"Incorrect number of egress queues per page\n");
			err = -EINVAL;
			goto out_unmap_bar0;
		}
		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
		pci_resource_len(pdev, 2));
		if (!adapter->bar2) {
			dev_err(&pdev->dev, "cannot map device bar2 region\n");
			err = -ENOMEM;
			goto out_unmap_bar0;
		}
	}

	setup_memwin(adapter);
	err = adap_init0(adapter);
	setup_memwin_rdma(adapter);
@@ -5552,6 +5596,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 out_free_dev:
	free_some_resources(adapter);
 out_unmap_bar:
	if (!is_t4(adapter->chip))
		iounmap(adapter->bar2);
 out_unmap_bar0:
	iounmap(adapter->regs);
 out_free_adapter:
	kfree(adapter);
@@ -5602,6 +5649,8 @@ static void remove_one(struct pci_dev *pdev)

		free_some_resources(adapter);
		iounmap(adapter->regs);
		if (!is_t4(adapter->chip))
			iounmap(adapter->bar2);
		kfree(adapter);
		pci_disable_pcie_error_reporting(pdev);
		pci_disable_device(pdev);
+49 −3
Original line number Diff line number Diff line
@@ -816,6 +816,22 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
		*end = 0;
}

/* This function copies 64 byte coalesced work request to
 * memory mapped BAR2 space(user space writes).
 * For coalesced WR SGE, fetches data from the FIFO instead of from Host.
 */
static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
{
	int count = 8;

	while (count) {
		writeq(*src, dst);
		src++;
		dst++;
		count--;
	}
}

/**
 *	ring_tx_db - check and potentially ring a Tx queue's doorbell
 *	@adap: the adapter
@@ -826,11 +842,25 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
 */
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
	unsigned int *wr, index;

	wmb();            /* write descriptors before telling HW */
	spin_lock(&q->db_lock);
	if (!q->db_disabled) {
		if (is_t4(adap->chip)) {
			t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
				     QID(q->cntxt_id) | PIDX(n));
		} else {
			if (n == 1) {
				index = q->pidx ? (q->pidx - 1) : (q->size - 1);
				wr = (unsigned int *)&q->desc[index];
				cxgb_pio_copy((u64 __iomem *)
					      (adap->bar2 + q->udb + 64),
					      (u64 *)wr);
			} else
				writel(n,  adap->bar2 + q->udb + 8);
			wmb();
		}
	}
	q->db_pidx = q->pidx;
	spin_unlock(&q->db_lock);
@@ -2151,11 +2181,27 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,

static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
{
	q->cntxt_id = id;
	if (!is_t4(adap->chip)) {
		unsigned int s_qpp;
		unsigned short udb_density;
		unsigned long qpshift;
		int page;

		s_qpp = QUEUESPERPAGEPF1 * adap->fn;
		udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap,
				SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp));
		qpshift = PAGE_SHIFT - ilog2(udb_density);
		q->udb = q->cntxt_id << qpshift;
		q->udb &= PAGE_MASK;
		page = q->udb / PAGE_SIZE;
		q->udb += (q->cntxt_id - (page * udb_density)) * 128;
	}

	q->in_use = 0;
	q->cidx = q->pidx = 0;
	q->stops = q->restarts = 0;
	q->stat = (void *)&q->desc[q->size];
	q->cntxt_id = id;
	spin_lock_init(&q->db_lock);
	adap->sge.egr_map[id - adap->sge.egr_start] = q;
}