Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5da0fc9d authored by Dennis Dalessandro's avatar Dennis Dalessandro Committed by Jason Gunthorpe
Browse files

IB/hfi1: Prepare resource waits for dual leg



Current implementation allows each qp to have only one send engine.  As
such, each qp has only one list to queue prebuilt packets when send engine
resources are not available. To improve performance, it is desired to
support multiple send engines for each qp.

This patch creates the framework to support two send engines
(two legs) for each qp for the TID RDMA protocol, which can be easily
extended to support more send engines. It achieves the goal by creating a
leg specific struct, iowait_work in the iowait struct, to hold the
work_struct and the tx_list as well as a pointer to the parent iowait
struct.

The hfi1_pkt_state now has an additional field to record the current legs
work structure and that is now passed to all egress waiters to determine
the leg that needs to wait via a new iowait helper.  The APIs are adjusted
to use the new leg specific struct as required.

Many new and modified helpers are added to support this change.

Reviewed-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent d205a06a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ hfi1-y := \
	firmware.o \
	init.o \
	intr.o \
	iowait.o \
	mad.o \
	mmu_rb.o \
	msix.o \
+91 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
 * Copyright(c) 2018 Intel Corporation.
 *
 */
#include "iowait.h"

void iowait_set_flag(struct iowait *wait, u32 flag)
{
	set_bit(flag, &wait->flags);
}

bool iowait_flag_set(struct iowait *wait, u32 flag)
{
	return test_bit(flag, &wait->flags);
}

inline void iowait_clear_flag(struct iowait *wait, u32 flag)
{
	clear_bit(flag, &wait->flags);
}

/**
 * iowait_init() - initialize wait structure
 * @wait: wait struct to initialize
 * @tx_limit: limit for overflow queuing
 * @func: restart function for workqueue
 * @sleep: sleep function for no space
 * @resume: wakeup function for no space
 *
 * This function initializes the iowait
 * structure embedded in the QP or PQ.
 *
 */
void iowait_init(struct iowait *wait, u32 tx_limit,
		 void (*func)(struct work_struct *work),
		 void (*tidfunc)(struct work_struct *work),
		 int (*sleep)(struct sdma_engine *sde,
			      struct iowait_work *wait,
			      struct sdma_txreq *tx,
			      uint seq,
			      bool pkts_sent),
		 void (*wakeup)(struct iowait *wait, int reason),
		 void (*sdma_drained)(struct iowait *wait))
{
	int i;

	wait->count = 0;
	INIT_LIST_HEAD(&wait->list);
	init_waitqueue_head(&wait->wait_dma);
	init_waitqueue_head(&wait->wait_pio);
	atomic_set(&wait->sdma_busy, 0);
	atomic_set(&wait->pio_busy, 0);
	wait->tx_limit = tx_limit;
	wait->sleep = sleep;
	wait->wakeup = wakeup;
	wait->sdma_drained = sdma_drained;
	wait->flags = 0;
	for (i = 0; i < IOWAIT_SES; i++) {
		wait->wait[i].iow = wait;
		INIT_LIST_HEAD(&wait->wait[i].tx_head);
		if (i == IOWAIT_IB_SE)
			INIT_WORK(&wait->wait[i].iowork, func);
		else
			INIT_WORK(&wait->wait[i].iowork, tidfunc);
	}
}

/**
 * iowait_cancel_work - cancel all work in iowait
 * @w: the iowait struct
 */
void iowait_cancel_work(struct iowait *w)
{
	cancel_work_sync(&iowait_get_ib_work(w)->iowork);
	cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}

/**
 * iowait_set_work_flag - set work flag based on leg
 * @w - the iowait work struct
 */
int iowait_set_work_flag(struct iowait_work *w)
{
	if (w == &w->iow->wait[IOWAIT_IB_SE]) {
		iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
		return IOWAIT_IB_SE;
	}
	iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
	return IOWAIT_TID_SE;
}
+134 −58
Original line number Diff line number Diff line
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
 * Copyright(c) 2015, 2016 Intel Corporation.
 * Copyright(c) 2015 - 2018 Intel Corporation.
 *
 * This file is provided under a dual BSD/GPLv2 license.  When using or
 * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@

#include <linux/list.h>
#include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/sched.h>

#include "sdma_txreq.h"
@@ -59,16 +60,47 @@
 */
typedef void (*restart_t)(struct work_struct *work);

#define IOWAIT_PENDING_IB  0x0
#define IOWAIT_PENDING_TID 0x1

/*
 * A QP can have multiple Send Engines (SEs).
 *
 * The current use case is for supporting a TID RDMA
 * packet build/xmit mechanism independent from verbs.
 */
#define IOWAIT_SES 2
#define IOWAIT_IB_SE 0
#define IOWAIT_TID_SE 1

struct sdma_txreq;
struct sdma_engine;
/**
 * struct iowait - linkage for delayed progress/waiting
 * @iowork: the work struct
 * @tx_head: list of prebuilt packets
 * @iow: the parent iowait structure
 *
 * This structure is the work item (process) specific
 * details associated with the each of the two SEs of the
 * QP.
 *
 * The workstruct and the queued TXs are unique to each
 * SE.
 */
struct iowait;
struct iowait_work {
	struct work_struct iowork;
	struct list_head tx_head;
	struct iowait *iow;
};

/**
 * @list: used to add/insert into QP/PQ wait lists
 * @lock: uses to record the list head lock
 * @tx_head: overflow list of sdma_txreq's
 * @sleep: no space callback
 * @wakeup: space callback wakeup
 * @sdma_drained: sdma count drained
 * @lock: lock protected head of wait queue
 * @iowork: workqueue overhead
 * @wait_dma: wait for sdma_busy == 0
 * @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
 * @count: total number of descriptors in tx_head'ed list
 * @tx_limit: limit for overflow queuing
 * @tx_count: number of tx entry's in tx_head'ed list
 * @flags: wait flags (one per QP)
 * @wait: SE array
 *
 * This is to be embedded in user's state structure
 * (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
 * Waiters explicity know that, but the destroy
 * code that unwaits QPs does not.
 */

struct iowait {
	struct list_head list;
	struct list_head tx_head;
	int (*sleep)(
		struct sdma_engine *sde,
		struct iowait *wait,
		struct iowait_work *wait,
		struct sdma_txreq *tx,
		uint seq,
		bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
	void (*wakeup)(struct iowait *wait, int reason);
	void (*sdma_drained)(struct iowait *wait);
	seqlock_t *lock;
	struct work_struct iowork;
	wait_queue_head_t wait_dma;
	wait_queue_head_t wait_pio;
	atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
	u32 tx_limit;
	u32 tx_count;
	u8 starved_cnt;
	unsigned long flags;
	struct iowait_work wait[IOWAIT_SES];
};

#define SDMA_AVAIL_REASON 0

/**
 * iowait_init() - initialize wait structure
 * @wait: wait struct to initialize
 * @tx_limit: limit for overflow queuing
 * @func: restart function for workqueue
 * @sleep: sleep function for no space
 * @resume: wakeup function for no space
 *
 * This function initializes the iowait
 * structure embedded in the QP or PQ.
 *
 */
void iowait_set_flag(struct iowait *wait, u32 flag);
bool iowait_flag_set(struct iowait *wait, u32 flag);
void iowait_clear_flag(struct iowait *wait, u32 flag);

static inline void iowait_init(
	struct iowait *wait,
	u32 tx_limit,
void iowait_init(struct iowait *wait, u32 tx_limit,
		 void (*func)(struct work_struct *work),
	int (*sleep)(
		struct sdma_engine *sde,
		struct iowait *wait,
		 void (*tidfunc)(struct work_struct *work),
		 int (*sleep)(struct sdma_engine *sde,
			      struct iowait_work *wait,
			      struct sdma_txreq *tx,
			      uint seq,
			      bool pkts_sent),
		 void (*wakeup)(struct iowait *wait, int reason),
	void (*sdma_drained)(struct iowait *wait))
{
	wait->count = 0;
	wait->lock = NULL;
	INIT_LIST_HEAD(&wait->list);
	INIT_LIST_HEAD(&wait->tx_head);
	INIT_WORK(&wait->iowork, func);
	init_waitqueue_head(&wait->wait_dma);
	init_waitqueue_head(&wait->wait_pio);
	atomic_set(&wait->sdma_busy, 0);
	atomic_set(&wait->pio_busy, 0);
	wait->tx_limit = tx_limit;
	wait->sleep = sleep;
	wait->wakeup = wakeup;
	wait->sdma_drained = sdma_drained;
}
		 void (*sdma_drained)(struct iowait *wait));

/**
 * iowait_schedule() - initialize wait structure
 * iowait_schedule() - schedule the default send engine work
 * @wait: wait struct to schedule
 * @wq: workqueue for schedule
 * @cpu: cpu
 */
static inline void iowait_schedule(
	struct iowait *wait,
	struct workqueue_struct *wq,
	int cpu)
static inline bool iowait_schedule(struct iowait *wait,
				   struct workqueue_struct *wq, int cpu)
{
	queue_work_on(cpu, wq, &wait->iowork);
	return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}

/**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
 */
static inline int iowait_sdma_dec(struct iowait *wait)
{
	if (!wait)
		return 0;
	return atomic_dec_and_test(&wait->sdma_busy);
}

@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}

/**
 * iowait_sdma_dec - note pio complete
 * iowait_pio_dec - note pio complete
 * @wait: iowait structure
 */
static inline int iowait_pio_dec(struct iowait *wait)
{
	if (!wait)
		return 0;
	return atomic_dec_and_test(&wait->pio_busy);
}

@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
 * iowait_get_txhead() - get packet off of iowait list
 *
 * @wait wait struture
 * @wait iowait_work struture
 */
static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
	struct sdma_txreq *tx = NULL;

@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
	return tx;
}

static inline u16 iowait_get_desc(struct iowait_work *w)
{
	u16 num_desc = 0;
	struct sdma_txreq *tx = NULL;

	if (!list_empty(&w->tx_head)) {
		tx = list_first_entry(&w->tx_head, struct sdma_txreq,
				      list);
		num_desc = tx->num_desc;
	}
	return num_desc;
}

static inline u32 iowait_get_all_desc(struct iowait *w)
{
	u32 num_desc = 0;

	num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
	num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
	return num_desc;
}

/**
 * iowait_queue - Put the iowait on a wait queue
 * @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
}

/**
 * iowait_packet_queued() - determine if a packet is already built
 * @wait: the wait structure
 * iowait_packet_queued() - determine if a packet is queued
 * @wait: the iowait_work structure
 */
static inline bool iowait_packet_queued(struct iowait *wait)
static inline bool iowait_packet_queued(struct iowait_work *wait)
{
	return !list_empty(&wait->tx_head);
}

/**
 * inc_wait_count - increment wait counts
 * @w: the log work struct
 * @n: the count
 */
static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
{
	if (!w)
		return;
	w->iow->tx_count++;
	w->iow->count += n;
}

/**
 * iowait_get_tid_work - return iowait_work for tid SE
 * @w: the iowait struct
 */
static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
{
	return &w->wait[IOWAIT_TID_SE];
}

/**
 * iowait_get_ib_work - return iowait_work for ib SE
 * @w: the iowait struct
 */
static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
{
	return &w->wait[IOWAIT_IB_SE];
}

/**
 * iowait_ioww_to_iow - return iowait given iowait_work
 * @w: the iowait_work struct
 */
static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
{
	if (likely(w))
		return w->iow;
	return NULL;
}

void iowait_cancel_work(struct iowait *w);
int iowait_set_work_flag(struct iowait_work *w);

#endif
+50 −17
Original line number Diff line number Diff line
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct iowait_work *wait,
	struct sdma_txreq *stx,
	unsigned int seq,
	bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {

};

static void flush_tx_list(struct rvt_qp *qp)
static void flush_list_head(struct list_head *l)
{
	struct hfi1_qp_priv *priv = qp->priv;

	while (!list_empty(&priv->s_iowait.tx_head)) {
	while (!list_empty(l)) {
		struct sdma_txreq *tx;

		tx = list_first_entry(
			&priv->s_iowait.tx_head,
			l,
			struct sdma_txreq,
			list);
		list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
	}
}

static void flush_tx_list(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
	flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
}

static void flush_iowait(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
@@ -336,7 +342,7 @@ int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
 * It is only used in the post send, which doesn't hold
 * the s_lock.
 */
void _hfi1_schedule_send(struct rvt_qp *qp)
bool _hfi1_schedule_send(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	struct hfi1_ibport *ibp =
@@ -344,7 +350,7 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);

	iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
	return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
			       priv->s_sde ?
			       priv->s_sde->cpu :
			       cpumask_first(cpumask_of_node(dd->node)));
@@ -375,12 +381,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
 *
 * This schedules qp progress and caller should hold
 * the s_lock.
 * @return true if the first leg is scheduled;
 * false if the first leg is not scheduled.
 */
void hfi1_schedule_send(struct rvt_qp *qp)
bool hfi1_schedule_send(struct rvt_qp *qp)
{
	lockdep_assert_held(&qp->s_lock);
	if (hfi1_send_ok(qp))
	if (hfi1_send_ok(qp)) {
		_hfi1_schedule_send(qp);
		return true;
	}
	if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
		iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
				IOWAIT_PENDING_IB);
	return false;
}

static void hfi1_qp_schedule(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;
	bool ret;

	if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
		ret = hfi1_schedule_send(qp);
		if (ret)
			iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
	}
}

void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -391,16 +417,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
	if (qp->s_flags & flag) {
		qp->s_flags &= ~flag;
		trace_hfi1_qpwakeup(qp, flag);
		hfi1_schedule_send(qp);
		hfi1_qp_schedule(qp);
	}
	spin_unlock_irqrestore(&qp->s_lock, flags);
	/* Notify hfi1_destroy_qp() if it is waiting. */
	rvt_put_qp(qp);
}

void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
	if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
		qp->s_flags &= ~RVT_S_BUSY;
}

static int iowait_sleep(
	struct sdma_engine *sde,
	struct iowait *wait,
	struct iowait_work *wait,
	struct sdma_txreq *stx,
	uint seq,
	bool pkts_sent)
@@ -441,7 +473,7 @@ static int iowait_sleep(
			rvt_get_qp(qp);
		}
		write_sequnlock(&dev->iowait_lock);
		qp->s_flags &= ~RVT_S_BUSY;
		hfi1_qp_unbusy(qp, wait);
		spin_unlock_irqrestore(&qp->s_lock, flags);
		ret = -EBUSY;
	} else {
@@ -640,6 +672,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
		&priv->s_iowait,
		1,
		_hfi1_do_send,
		NULL,
		iowait_sleep,
		iowait_wakeup,
		iowait_sdma_drained);
@@ -689,7 +722,7 @@ void stop_send_queue(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	cancel_work_sync(&priv->s_iowait.iowork);
	iowait_cancel_work(&priv->s_iowait);
}

void quiesce_qp(struct rvt_qp *qp)
+17 −14
Original line number Diff line number Diff line
@@ -57,18 +57,6 @@ extern unsigned int hfi1_qp_table_size;

extern const struct rvt_operation_params hfi1_post_parms[];

/*
 * Send if not busy or waiting for I/O and either
 * a RC response is pending or we can process send work requests.
 */
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
		(verbs_txreq_queued(qp) ||
		(qp->s_flags & RVT_S_RESP_PENDING) ||
		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}

/*
 * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
 *
@@ -89,6 +77,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)

/*
 * Send if not busy or waiting for I/O and either
 * a RC response is pending or we can process send work requests.
 */
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
	struct hfi1_qp_priv *priv = qp->priv;

	return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
		(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
		(qp->s_flags & RVT_S_RESP_PENDING) ||
		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}

/*
 * free_ahg - clear ahg from QP
 */
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);

void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);

void _hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_schedule_send(struct rvt_qp *qp);
bool _hfi1_schedule_send(struct rvt_qp *qp);
bool hfi1_schedule_send(struct rvt_qp *qp);

void hfi1_migrate_qp(struct rvt_qp *qp);

@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */
Loading