Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3069ee9b authored by Vipul Pandya's avatar Vipul Pandya Committed by Roland Dreier
Browse files

cxgb4: DB Drop Recovery for RDMA and LLD queues



recover LLD EQs for DB drop interrupts.  This includes adding a new
db_lock, a spin lock disabling BH too, used by the recovery thread and
the ring_tx_db() paths to allow db drop recovery.

Clean up initial DB avoidance code.

Add read_eq_indices() - this allows the LLD to use the PCIe mw to
efficiently read hw eq contexts.

Add cxgb4_sync_txq_pidx() - called by iw_cxgb4 to sync up the sw/hw
pidx value.

Add flush_eq_cache() and cxgb4_flush_eq_cache().  This allows iw_cxgb4
to flush the sge eq context cache before beginning db drop recovery.

Add module parameter, dbfoifo_int_thresh, to allow tuning the db
interrupt threshold value.

Add dbfifo_int_thresh to cxgb4_lld_info so iw_cxgb4 knows the threshold.

Add module parameter, dbfoifo_drain_delay, to allow tuning the amount
of time delay between DB FULL and EMPTY upcalls to iw_cxgb4.

Signed-off-by: default avatarVipul Pandya <vipul@chelsio.com>
Signed-off-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 8caa1e84
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -51,6 +51,8 @@
#define FW_VERSION_MINOR 1
#define FW_VERSION_MICRO 0

#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)

enum {
	MAX_NPORTS = 4,     /* max # of ports */
	SERNUM_LEN = 24,    /* Serial # length */
@@ -64,6 +66,15 @@ enum {
	MEM_MC
};

enum {
	MEMWIN0_APERTURE = 65536,
	MEMWIN0_BASE     = 0x30000,
	MEMWIN1_APERTURE = 32768,
	MEMWIN1_BASE     = 0x28000,
	MEMWIN2_APERTURE = 2048,
	MEMWIN2_BASE     = 0x1b800,
};

enum dev_master {
	MASTER_CANT,
	MASTER_MAY,
@@ -403,6 +414,9 @@ struct sge_txq {
	struct tx_sw_desc *sdesc;   /* address of SW Tx descriptor ring */
	struct sge_qstat *stat;     /* queue status entry */
	dma_addr_t    phys_addr;    /* physical address of the ring */
	spinlock_t db_lock;
	int db_disabled;
	unsigned short db_pidx;
};

struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
@@ -475,6 +489,7 @@ struct adapter {
	void __iomem *regs;
	struct pci_dev *pdev;
	struct device *pdev_dev;
	unsigned int mbox;
	unsigned int fn;
	unsigned int flags;

@@ -607,6 +622,7 @@ irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
void t4_sge_init(struct adapter *adap);
void t4_sge_start(struct adapter *adap);
void t4_sge_stop(struct adapter *adap);
extern int dbfifo_int_thresh;

#define for_each_port(adapter, iter) \
	for (iter = 0; iter < (adapter)->params.nports; ++iter)
+176 −38
Original line number Diff line number Diff line
@@ -148,15 +148,6 @@ static unsigned int pfvfres_pmask(struct adapter *adapter,
}
#endif

enum {
	MEMWIN0_APERTURE = 65536,
	MEMWIN0_BASE     = 0x30000,
	MEMWIN1_APERTURE = 32768,
	MEMWIN1_BASE     = 0x28000,
	MEMWIN2_APERTURE = 2048,
	MEMWIN2_BASE     = 0x1b800,
};

enum {
	MAX_TXQ_ENTRIES      = 16384,
	MAX_CTRL_TXQ_ENTRIES = 1024,
@@ -371,6 +362,15 @@ static int set_addr_filters(const struct net_device *dev, bool sleep)
				uhash | mhash, sleep);
}

int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */
module_param(dbfifo_int_thresh, int, 0644);
MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold");

int dbfifo_drain_delay = 1000; /* usecs to sleep while draining the dbfifo */
module_param(dbfifo_drain_delay, int, 0644);
MODULE_PARM_DESC(dbfifo_drain_delay,
		 "usecs to sleep while draining the dbfifo");

/*
 * Set Rx properties of a port, such as promiscruity, address filters, and MTU.
 * If @mtu is -1 it is left unchanged.
@@ -389,6 +389,8 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
	return ret;
}

static struct workqueue_struct *workq;

/**
 *	link_start - enable a port
 *	@dev: the port to enable
@@ -2196,7 +2198,7 @@ static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
	adap->tid_release_head = (void **)((uintptr_t)p | chan);
	if (!adap->tid_release_task_busy) {
		adap->tid_release_task_busy = true;
		schedule_work(&adap->tid_release_task);
		queue_work(workq, &adap->tid_release_task);
	}
	spin_unlock_bh(&adap->tid_release_lock);
}
@@ -2423,6 +2425,59 @@ void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
}
EXPORT_SYMBOL(cxgb4_iscsi_init);

int cxgb4_flush_eq_cache(struct net_device *dev)
{
	struct adapter *adap = netdev2adap(dev);
	int ret;

	ret = t4_fwaddrspace_write(adap, adap->mbox,
				   0xe1000000 + A_SGE_CTXT_CMD, 0x20000000);
	return ret;
}
EXPORT_SYMBOL(cxgb4_flush_eq_cache);

static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx)
{
	u32 addr = t4_read_reg(adap, A_SGE_DBQ_CTXT_BADDR) + 24 * qid + 8;
	__be64 indices;
	int ret;

	ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8);
	if (!ret) {
		indices = be64_to_cpu(indices);
		*cidx = (indices >> 25) & 0xffff;
		*pidx = (indices >> 9) & 0xffff;
	}
	return ret;
}

int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
			u16 size)
{
	struct adapter *adap = netdev2adap(dev);
	u16 hw_pidx, hw_cidx;
	int ret;

	ret = read_eq_indices(adap, qid, &hw_pidx, &hw_cidx);
	if (ret)
		goto out;

	if (pidx != hw_pidx) {
		u16 delta;

		if (pidx >= hw_pidx)
			delta = pidx - hw_pidx;
		else
			delta = size - hw_pidx + pidx;
		wmb();
		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
			     V_QID(qid) | V_PIDX(delta));
	}
out:
	return ret;
}
EXPORT_SYMBOL(cxgb4_sync_txq_pidx);

static struct pci_driver cxgb4_driver;

static void check_neigh_update(struct neighbour *neigh)
@@ -2456,6 +2511,95 @@ static struct notifier_block cxgb4_netevent_nb = {
	.notifier_call = netevent_cb
};

static void drain_db_fifo(struct adapter *adap, int usecs)
{
	u32 v;

	do {
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(usecs_to_jiffies(usecs));
		v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
		if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0)
			break;
	} while (1);
}

static void disable_txq_db(struct sge_txq *q)
{
	spin_lock_irq(&q->db_lock);
	q->db_disabled = 1;
	spin_unlock_irq(&q->db_lock);
}

static void enable_txq_db(struct sge_txq *q)
{
	spin_lock_irq(&q->db_lock);
	q->db_disabled = 0;
	spin_unlock_irq(&q->db_lock);
}

static void disable_dbs(struct adapter *adap)
{
	int i;

	for_each_ethrxq(&adap->sge, i)
		disable_txq_db(&adap->sge.ethtxq[i].q);
	for_each_ofldrxq(&adap->sge, i)
		disable_txq_db(&adap->sge.ofldtxq[i].q);
	for_each_port(adap, i)
		disable_txq_db(&adap->sge.ctrlq[i].q);
}

static void enable_dbs(struct adapter *adap)
{
	int i;

	for_each_ethrxq(&adap->sge, i)
		enable_txq_db(&adap->sge.ethtxq[i].q);
	for_each_ofldrxq(&adap->sge, i)
		enable_txq_db(&adap->sge.ofldtxq[i].q);
	for_each_port(adap, i)
		enable_txq_db(&adap->sge.ctrlq[i].q);
}

static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
{
	u16 hw_pidx, hw_cidx;
	int ret;

	spin_lock_bh(&q->db_lock);
	ret = read_eq_indices(adap, (u16)q->cntxt_id, &hw_pidx, &hw_cidx);
	if (ret)
		goto out;
	if (q->db_pidx != hw_pidx) {
		u16 delta;

		if (q->db_pidx >= hw_pidx)
			delta = q->db_pidx - hw_pidx;
		else
			delta = q->size - hw_pidx + q->db_pidx;
		wmb();
		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
				V_QID(q->cntxt_id) | V_PIDX(delta));
	}
out:
	q->db_disabled = 0;
	spin_unlock_bh(&q->db_lock);
	if (ret)
		CH_WARN(adap, "DB drop recovery failed.\n");
}
static void recover_all_queues(struct adapter *adap)
{
	int i;

	for_each_ethrxq(&adap->sge, i)
		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
	for_each_ofldrxq(&adap->sge, i)
		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
	for_each_port(adap, i)
		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
}

static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
{
	mutex_lock(&uld_mutex);
@@ -2468,55 +2612,41 @@ static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
static void process_db_full(struct work_struct *work)
{
	struct adapter *adap;
	static int delay = 1000;
	u32 v;

	adap = container_of(work, struct adapter, db_full_task);


	/* stop LLD queues */

	notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL);
	do {
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(usecs_to_jiffies(delay));
		v = t4_read_reg(adap, A_SGE_DBFIFO_STATUS);
		if (G_LP_COUNT(v) == 0 && G_HP_COUNT(v) == 0)
			break;
	} while (1);
	drain_db_fifo(adap, dbfifo_drain_delay);
	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT,
			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT);
	notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY);


	/*
	 * The more we get db full interrupts, the more we'll delay
	 * in re-enabling db rings on queues, capped off at 200ms.
	 */
	delay = min(delay << 1, 200000);

	/* resume LLD queues */
}

static void process_db_drop(struct work_struct *work)
{
	struct adapter *adap;
	adap = container_of(work, struct adapter, db_drop_task);

	adap = container_of(work, struct adapter, db_drop_task);

	/*
	 * sync the PIDX values in HW and SW for LLD queues.
	 */

	t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_DROPPED_DB, 0);
	disable_dbs(adap);
	notify_rdma_uld(adap, CXGB4_CONTROL_DB_DROP);
	drain_db_fifo(adap, 1);
	recover_all_queues(adap);
	enable_dbs(adap);
}

void t4_db_full(struct adapter *adap)
{
	schedule_work(&adap->db_full_task);
	t4_set_reg_field(adap, A_SGE_INT_ENABLE3,
			F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, 0);
	queue_work(workq, &adap->db_full_task);
}

void t4_db_dropped(struct adapter *adap)
{
	schedule_work(&adap->db_drop_task);
	queue_work(workq, &adap->db_drop_task);
}

static void uld_attach(struct adapter *adap, unsigned int uld)
@@ -2552,6 +2682,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
	lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS);
	lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL);
	lli.fw_vers = adap->params.fw_vers;
	lli.dbfifo_int_thresh = dbfifo_int_thresh;

	handle = ulds[uld].add(&lli);
	if (IS_ERR(handle)) {
@@ -3668,6 +3799,7 @@ static int __devinit init_one(struct pci_dev *pdev,

	adapter->pdev = pdev;
	adapter->pdev_dev = &pdev->dev;
	adapter->mbox = func;
	adapter->fn = func;
	adapter->msg_enable = dflt_msg_enable;
	memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
@@ -3865,6 +3997,10 @@ static int __init cxgb4_init_module(void)
{
	int ret;

	workq = create_singlethread_workqueue("cxgb4");
	if (!workq)
		return -ENOMEM;

	/* Debugfs support is optional, just warn if this fails */
	cxgb4_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
	if (!cxgb4_debugfs_root)
@@ -3880,6 +4016,8 @@ static void __exit cxgb4_cleanup_module(void)
{
	pci_unregister_driver(&cxgb4_driver);
	debugfs_remove(cxgb4_debugfs_root);  /* NULL ok */
	flush_workqueue(workq);
	destroy_workqueue(workq);
}

module_init(cxgb4_init_module);
+4 −0
Original line number Diff line number Diff line
@@ -218,6 +218,7 @@ struct cxgb4_lld_info {
	unsigned short ucq_density;          /* # of user CQs/page */
	void __iomem *gts_reg;               /* address of GTS register */
	void __iomem *db_reg;                /* address of kernel doorbell */
	int dbfifo_int_thresh;		     /* doorbell fifo int threshold */
};

struct cxgb4_uld_info {
@@ -226,6 +227,7 @@ struct cxgb4_uld_info {
	int (*rx_handler)(void *handle, const __be64 *rsp,
			  const struct pkt_gl *gl);
	int (*state_change)(void *handle, enum cxgb4_state new_state);
	int (*control)(void *handle, enum cxgb4_control control, ...);
};

int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
@@ -243,4 +245,6 @@ void cxgb4_iscsi_init(struct net_device *dev, unsigned int tag_mask,
		      const unsigned int *pgsz_order);
struct sk_buff *cxgb4_pktgl_to_skb(const struct pkt_gl *gl,
				   unsigned int skb_len, unsigned int pull_len);
int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, u16 size);
int cxgb4_flush_eq_cache(struct net_device *dev);
#endif  /* !__CXGB4_OFLD_H */
+16 −4
Original line number Diff line number Diff line
@@ -767,8 +767,13 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
	wmb();            /* write descriptors before telling HW */
	t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
		     QID(q->cntxt_id) | PIDX(n));
	spin_lock(&q->db_lock);
	if (!q->db_disabled) {
		t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL),
			     V_QID(q->cntxt_id) | V_PIDX(n));
	}
	q->db_pidx = q->pidx;
	spin_unlock(&q->db_lock);
}

/**
@@ -2081,6 +2086,7 @@ static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
	q->stops = q->restarts = 0;
	q->stat = (void *)&q->desc[q->size];
	q->cntxt_id = id;
	spin_lock_init(&q->db_lock);
	adap->sge.egr_map[id - adap->sge.egr_start] = q;
}

@@ -2415,9 +2421,15 @@ void t4_sge_init(struct adapter *adap)
			 RXPKTCPLMODE |
			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));

	/*
	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
	 * and generate an interrupt when this occurs so we can recover.
	 */
	t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS,
			V_HP_INT_THRESH(5) | V_LP_INT_THRESH(5),
			V_HP_INT_THRESH(5) | V_LP_INT_THRESH(5));
			V_HP_INT_THRESH(M_HP_INT_THRESH) |
			V_LP_INT_THRESH(M_LP_INT_THRESH),
			V_HP_INT_THRESH(dbfifo_int_thresh) |
			V_LP_INT_THRESH(dbfifo_int_thresh));
	t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP,
			F_ENABLE_DROP);

+53 −0
Original line number Diff line number Diff line
@@ -190,6 +190,59 @@
#define SGE_DEBUG_DATA_LOW 0x10d4
#define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4

#define S_LP_INT_THRESH    12
#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH)
#define S_HP_INT_THRESH    28
#define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH)
#define A_SGE_DBFIFO_STATUS 0x10a4

#define S_ENABLE_DROP    13
#define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP)
#define F_ENABLE_DROP    V_ENABLE_DROP(1U)
#define A_SGE_DOORBELL_CONTROL 0x10a8

#define A_SGE_CTXT_CMD 0x11fc
#define A_SGE_DBQ_CTXT_BADDR 0x1084

#define A_SGE_PF_KDOORBELL 0x0

#define S_QID 15
#define V_QID(x) ((x) << S_QID)

#define S_PIDX 0
#define V_PIDX(x) ((x) << S_PIDX)

#define M_LP_COUNT 0x7ffU
#define S_LP_COUNT 0
#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT)

#define M_HP_COUNT 0x7ffU
#define S_HP_COUNT 16
#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT)

#define A_SGE_INT_ENABLE3 0x1040

#define S_DBFIFO_HP_INT 8
#define V_DBFIFO_HP_INT(x) ((x) << S_DBFIFO_HP_INT)
#define F_DBFIFO_HP_INT V_DBFIFO_HP_INT(1U)

#define S_DBFIFO_LP_INT 7
#define V_DBFIFO_LP_INT(x) ((x) << S_DBFIFO_LP_INT)
#define F_DBFIFO_LP_INT V_DBFIFO_LP_INT(1U)

#define S_DROPPED_DB 0
#define V_DROPPED_DB(x) ((x) << S_DROPPED_DB)
#define F_DROPPED_DB V_DROPPED_DB(1U)

#define S_ERR_DROPPED_DB 18
#define V_ERR_DROPPED_DB(x) ((x) << S_ERR_DROPPED_DB)
#define F_ERR_DROPPED_DB V_ERR_DROPPED_DB(1U)

#define A_PCIE_MEM_ACCESS_OFFSET 0x306c

#define M_HP_INT_THRESH 0xfU
#define M_LP_INT_THRESH 0xfU

#define PCIE_PF_CLI 0x44
#define PCIE_INT_CAUSE 0x3004
#define  UNXSPLCPLERR  0x20000000U
Loading