Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d63a6dcf authored by Hariprasad Shenai's avatar Hariprasad Shenai Committed by David S. Miller
Browse files

cxgb4: Use BAR2 Going To Sleep (GTS) for T5 and later.



Use BAR2 GTS for T5. If we are on T4 use the old doorbell mechanism;
otherwise ue the new BAR2 mechanism. Use BAR2 doorbells for refilling FL's.

Based on original work by Casey Leedom <leedom@chelsio.com>

Signed-off-by: default avatarHariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 825bae5d
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -431,6 +431,7 @@ struct sge_fl { /* SGE free-buffer queue state */
	struct rx_sw_desc *sdesc;   /* address of SW Rx descriptor ring */
	__be64 *desc;               /* address of HW Rx descriptor ring */
	dma_addr_t addr;            /* bus address of HW ring start */
	u64 udb;                    /* BAR2 offset of User Doorbell area */
};

/* A packet gather list */
@@ -459,6 +460,7 @@ struct sge_rspq { /* state for an SGE response queue */
	u16 abs_id;                 /* absolute SGE id for the response q */
	__be64 *desc;               /* address of HW response ring */
	dma_addr_t phys_addr;       /* physical address of the ring */
	u64 udb;                    /* BAR2 offset of User Doorbell area */
	unsigned int iqe_len;       /* entry size */
	unsigned int size;          /* capacity of response queue */
	struct adapter *adap;
@@ -516,7 +518,7 @@ struct sge_txq {
	int db_disabled;
	unsigned short db_pidx;
	unsigned short db_pidx_inc;
	u64 udb;
	u64 udb;                    /* BAR2 offset of User Doorbell area */
};

struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
+145 −41
Original line number Diff line number Diff line
@@ -521,9 +521,23 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
		val = PIDX(q->pend_cred / 8);
		if (!is_t4(adap->params.chip))
			val |= DBTYPE(1);
		val |= DBPRIO(1);
		wmb();
		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
			     QID(q->cntxt_id) | val);

		/* If we're on T4, use the old doorbell mechanism; otherwise
		 * use the new BAR2 mechanism.
		 */
		if (is_t4(adap->params.chip)) {
			t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
				     val | QID(q->cntxt_id));
		} else {
			writel(val,  adap->bar2 + q->udb + SGE_UDB_KDOORBELL);

			/* This Write memory Barrier will force the write to
			 * the User Doorbell area to be flushed.
			 */
			wmb();
		}
		q->pend_cred &= 7;
	}
}
@@ -859,30 +873,66 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
 */
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
	unsigned int *wr, index;
	wmb();            /* write descriptors before telling HW */

	if (is_t4(adap->params.chip)) {
		u32 val = PIDX(n);
		unsigned long flags;

	wmb();            /* write descriptors before telling HW */
		/* For T4 we need to participate in the Doorbell Recovery
		 * mechanism.
		 */
		spin_lock_irqsave(&q->db_lock, flags);
	if (!q->db_disabled) {
		if (is_t4(adap->params.chip)) {
		if (!q->db_disabled)
			t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
				     QID(q->cntxt_id) | PIDX(n));
				     QID(q->cntxt_id) | val);
		else
			q->db_pidx_inc += n;
		q->db_pidx = q->pidx;
		spin_unlock_irqrestore(&q->db_lock, flags);
	} else {
		u32 val = PIDX_T5(n);

		/* T4 and later chips share the same PIDX field offset within
		 * the doorbell, but T5 and later shrank the field in order to
		 * gain a bit for Doorbell Priority.  The field was absurdly
		 * large in the first place (14 bits) so we just use the T5
		 * and later limits and warn if a Queue ID is too large.
		 */
		WARN_ON(val & DBPRIO(1));

		/* For T5 and later we use the Write-Combine mapped BAR2 User
		 * Doorbell mechanism.  If we're only writing a single TX
		 * Descriptor and TX Write Combining hasn't been disabled, we
		 * can use the Write Combining Gather Buffer; otherwise we use
		 * the simple doorbell.
		 */
		if (n == 1) {
				index = q->pidx ? (q->pidx - 1) : (q->size - 1);
				wr = (unsigned int *)&q->desc[index];
			int index = (q->pidx
				     ? (q->pidx - 1)
				     : (q->size - 1));
			unsigned int *wr = (unsigned int *)&q->desc[index];

			cxgb_pio_copy((u64 __iomem *)
					      (adap->bar2 + q->udb + 64),
				      (adap->bar2 + q->udb +
				       SGE_UDB_WCDOORBELL),
				      (u64 *)wr);
			} else
				writel(n,  adap->bar2 + q->udb + 8);
		} else {
			writel(val,  adap->bar2 + q->udb + SGE_UDB_KDOORBELL);
		}

		/* This Write Memory Barrier will force the write to the User
		 * Doorbell area to be flushed.  This is needed to prevent
		 * writes on different CPUs for the same queue from hitting
		 * the adapter out of order.  This is required when some Work
		 * Requests take the Write Combine Gather Buffer path (user
		 * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
		 * take the traditional path where we simply increment the
		 * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
		 * hardware DMA read the actual Work Request.
		 */
		wmb();
	}
	} else
		q->db_pidx_inc += n;
	q->db_pidx = q->pidx;
	spin_unlock_irqrestore(&q->db_lock, flags);
}

/**
@@ -1916,6 +1966,7 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
	unsigned int params;
	struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
	int work_done = process_responses(q, budget);
	u32 val;

	if (likely(work_done < budget)) {
		napi_complete(napi);
@@ -1924,8 +1975,14 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
	} else
		params = QINTR_TIMER_IDX(7);

	t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) |
		     INGRESSQID((u32)q->cntxt_id) | SEINTARM(params));
	val = CIDXINC(work_done) | SEINTARM(params);
	if (is_t4(q->adap->params.chip)) {
		t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS),
			     val | INGRESSQID((u32)q->cntxt_id));
	} else {
		writel(val, q->adap->bar2 + q->udb + SGE_UDB_GTS);
		wmb();
	}
	return work_done;
}

@@ -1949,6 +2006,7 @@ static unsigned int process_intrq(struct adapter *adap)
	unsigned int credits;
	const struct rsp_ctrl *rc;
	struct sge_rspq *q = &adap->sge.intrq;
	u32 val;

	spin_lock(&adap->sge.intrq_lock);
	for (credits = 0; ; credits++) {
@@ -1967,8 +2025,14 @@ static unsigned int process_intrq(struct adapter *adap)
		rspq_next(q);
	}

	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) |
		     INGRESSQID(q->cntxt_id) | SEINTARM(q->intr_params));
	val =  CIDXINC(credits) | SEINTARM(q->intr_params);
	if (is_t4(adap->params.chip)) {
		t4_write_reg(adap, MYPF_REG(SGE_PF_GTS),
			     val | INGRESSQID(q->cntxt_id));
	} else {
		writel(val, adap->bar2 + q->udb + SGE_UDB_GTS);
		wmb();
	}
	spin_unlock(&adap->sge.intrq_lock);
	return credits;
}
@@ -2149,6 +2213,51 @@ static void sge_tx_timer_cb(unsigned long data)
	mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
}

/**
 *      udb_address - return the BAR2 User Doorbell address for a Queue
 *      @adap: the adapter
 *      @cntxt_id: the Queue Context ID
 *      @qpp: Queues Per Page (for all PFs)
 *
 *      Returns the BAR2 address of the user Doorbell associated with the
 *      indicated Queue Context ID.  Note that this is only applicable
 *      for T5 and later.
 */
static u64 udb_address(struct adapter *adap, unsigned int cntxt_id,
		       unsigned int qpp)
{
	u64 udb;
	unsigned int s_qpp;
	unsigned short udb_density;
	unsigned long qpshift;
	int page;

	BUG_ON(is_t4(adap->params.chip));

	s_qpp = (QUEUESPERPAGEPF0 +
		(QUEUESPERPAGEPF1 - QUEUESPERPAGEPF0) * adap->fn);
	udb_density = 1 << ((qpp >> s_qpp) & QUEUESPERPAGEPF0_MASK);
	qpshift = PAGE_SHIFT - ilog2(udb_density);
	udb = cntxt_id << qpshift;
	udb &= PAGE_MASK;
	page = udb / PAGE_SIZE;
	udb += (cntxt_id - (page * udb_density)) * SGE_UDB_SIZE;

	return udb;
}

static u64 udb_address_eq(struct adapter *adap, unsigned int cntxt_id)
{
	return udb_address(adap, cntxt_id,
			   t4_read_reg(adap, SGE_EGRESS_QUEUES_PER_PAGE_PF));
}

static u64 udb_address_iq(struct adapter *adap, unsigned int cntxt_id)
{
	return udb_address(adap, cntxt_id,
			   t4_read_reg(adap, SGE_INGRESS_QUEUES_PER_PAGE_PF));
}

int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
		     struct net_device *dev, int intr_idx,
		     struct sge_fl *fl, rspq_handler_t hnd)
@@ -2214,6 +2323,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
	iq->next_intr_params = iq->intr_params;
	iq->cntxt_id = ntohs(c.iqid);
	iq->abs_id = ntohs(c.physiqid);
	if (!is_t4(adap->params.chip))
		iq->udb = udb_address_iq(adap, iq->cntxt_id);
	iq->size--;                           /* subtract status entry */
	iq->netdev = dev;
	iq->handler = hnd;
@@ -2229,6 +2340,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
		fl->pidx = fl->cidx = 0;
		fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
		adap->sge.egr_map[fl->cntxt_id - adap->sge.egr_start] = fl;

		/* Note, we must initialize the Free List User Doorbell
		 * address before refilling the Free List!
		 */
		if (!is_t4(adap->params.chip))
			fl->udb = udb_address_eq(adap, fl->cntxt_id);
		refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
	}
	return 0;
@@ -2254,21 +2371,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
{
	q->cntxt_id = id;
	if (!is_t4(adap->params.chip)) {
		unsigned int s_qpp;
		unsigned short udb_density;
		unsigned long qpshift;
		int page;

		s_qpp = QUEUESPERPAGEPF1 * adap->fn;
		udb_density = 1 << QUEUESPERPAGEPF0_GET((t4_read_reg(adap,
				SGE_EGRESS_QUEUES_PER_PAGE_PF) >> s_qpp));
		qpshift = PAGE_SHIFT - ilog2(udb_density);
		q->udb = q->cntxt_id << qpshift;
		q->udb &= PAGE_MASK;
		page = q->udb / PAGE_SIZE;
		q->udb += (q->cntxt_id - (page * udb_density)) * 128;
	}
	if (!is_t4(adap->params.chip))
		q->udb = udb_address_eq(adap, q->cntxt_id);

	q->in_use = 0;
	q->cidx = q->pidx = 0;
+19 −0
Original line number Diff line number Diff line
@@ -157,8 +157,27 @@
#define  QUEUESPERPAGEPF0_MASK   0x0000000fU
#define  QUEUESPERPAGEPF0_GET(x) ((x) & QUEUESPERPAGEPF0_MASK)

#define QUEUESPERPAGEPF0    0
#define QUEUESPERPAGEPF1    4

/* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
 * The User Doorbells are each 128 bytes in length with a Simple Doorbell at
 * offsets 8x and a Write Combining single 64-byte Egress Queue Unit
 * (X_IDXSIZE_UNIT) Gather Buffer interface at offset 64.  For Ingress Queues,
 * we have a Going To Sleep register at offsets 8x+4.
 *
 * As noted above, we have many instances of the Simple Doorbell and Going To
 * Sleep registers at offsets 8x and 8x+4, respectively.  We want to use a
 * non-64-byte aligned offset for the Simple Doorbell in order to attempt to
 * avoid buffering of the writes to the Simple Doorbell and we want to use a
 * non-contiguous offset for the Going To Sleep writes in order to avoid
 * possible combining between them.
 */
#define SGE_UDB_SIZE            128
#define SGE_UDB_KDOORBELL       8
#define SGE_UDB_GTS             20
#define SGE_UDB_WCDOORBELL      64

#define SGE_INT_CAUSE1 0x1024
#define SGE_INT_CAUSE2 0x1030
#define SGE_INT_CAUSE3 0x103c