Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 14553ca1 authored by Mike Marciniszyn's avatar Mike Marciniszyn Committed by Doug Ledford
Browse files

staging/rdma/hfi1: Adaptive PIO for short messages



The change requires a new pio_busy field in the iowait structure to
track the number of outstanding pios.  The new counter together
with the sdma counter serve as the basis for a packet by packet decision
as to which egress mechanism to use.  Since packets given to different
egress mechanisms are not ordered, this scheme will preserve the order.

The iowait drain/wait mechanisms are extended for a pio case.  An
additional qp wait flag is added for the PIO drain wait case.

Currently the only pio wait is for buffers, so the no_bufs_available()
routine name is changed to pio_wait() and a third argument is passed
with one of the two pio wait flags to generalize the routine.  A module
parameter is added to hold a configurable threshold. For now, the
module parameter is zero.

A heuristic routine is added to return the func pointer of the proper
egress routine to use.

The heuristic is as follows:
- SMI always uses pio
- GSI,UD qps <= threshold use pio
- UD qps > threadhold use sdma
  o No coordination with sdma is required because order is not required
    and this qp pio count is not maintained for UD
- RC/UC ONLY packets <= threshold chose as follows:
  o If sdmas pending, use SDMA
  o Otherwise use pio and enable the pio tracking count at
    the time the pio buffer is allocated
- RC/UC ONLY packets > threshold use SDMA
  o If pio's are pending the pio_wait with the new wait flag is
    called to delay for pios to drain

The threshold is potentially reduced by the QP's mtu.

The sc_buffer_alloc() has two additional args (a callback, a void *)
which are exploited by the RC/UC cases to pass a new complete routine
and a qp *.

When the shadow ring completes the credit associated with a packet,
the new complete routine is called.  The verbs_pio_complete() will then
decrement the busy count and trigger any drain waiters in qp destroy
or reset.

Reviewed-by: default avatarJubin John <jubin.john@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 4f8cc5c0
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -1588,6 +1588,14 @@ static u64 access_sw_pio_wait(const struct cntr_entry *entry,
	return dd->verbs_dev.n_piowait;
}

static u64 access_sw_pio_drain(const struct cntr_entry *entry,
			       void *context, int vl, int mode, u64 data)
{
	struct hfi1_devdata *dd = (struct hfi1_devdata *)context;

	return dd->verbs_dev.n_piodrain;
}

static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
			      void *context, int vl, int mode, u64 data)
{
@@ -4129,6 +4137,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
			    access_sw_vtx_wait),
[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
			    access_sw_pio_wait),
[C_SW_PIO_DRAIN] = CNTR_ELEM("PioDrain", 0, 0, CNTR_NORMAL,
			    access_sw_pio_drain),
[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
			    access_sw_kmem_wait),
[C_SW_SEND_SCHED] = CNTR_ELEM("SendSched", 0, 0, CNTR_NORMAL,
+1 −0
Original line number Diff line number Diff line
@@ -800,6 +800,7 @@ enum {
	C_SW_CPU_RCV_LIM,
	C_SW_VTX_WAIT,
	C_SW_PIO_WAIT,
	C_SW_PIO_DRAIN,
	C_SW_KMEM_WAIT,
	C_SW_SEND_SCHED,
	C_SDMA_DESC_FETCHED_CNT,
+3 −4
Original line number Diff line number Diff line
@@ -811,6 +811,7 @@ struct sdma_vl_map;
#define BOARD_VERS_MAX 96 /* how long the version string can be */
#define SERIAL_MAX 16 /* length of the serial number */

typedef int (*send_routine)(struct rvt_qp *, struct hfi1_pkt_state *, u64);
struct hfi1_devdata {
	struct hfi1_ibdev verbs_dev;     /* must be first */
	struct list_head list;
@@ -1121,10 +1122,8 @@ struct hfi1_devdata {
	 * Handlers for outgoing data so that snoop/capture does not
	 * have to have its hooks in the send path
	 */
	int (*process_pio_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
				u64 pbc);
	int (*process_dma_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
				u64 pbc);
	send_routine process_pio_send;
	send_routine process_dma_send;
	void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
				u64 pbc, const void *from, size_t count);

+89 −0
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@
#include <linux/sched.h>

#include "sdma_txreq.h"

/*
 * typedef (*restart_t)() - restart callback
 * @work: pointer to work structure
@@ -71,6 +72,7 @@ struct sdma_engine;
 * @wakeup: space callback
 * @iowork: workqueue overhead
 * @wait_dma: wait for sdma_busy == 0
 * @wait_pio: wait for pio_busy == 0
 * @sdma_busy: # of packets in flight
 * @count: total number of descriptors in tx_head'ed list
 * @tx_limit: limit for overflow queuing
@@ -104,7 +106,9 @@ struct iowait {
	void (*wakeup)(struct iowait *wait, int reason);
	struct work_struct iowork;
	wait_queue_head_t wait_dma;
	wait_queue_head_t wait_pio;
	atomic_t sdma_busy;
	atomic_t pio_busy;
	u32 count;
	u32 tx_limit;
	u32 tx_count;
@@ -141,7 +145,9 @@ static inline void iowait_init(
	INIT_LIST_HEAD(&wait->tx_head);
	INIT_WORK(&wait->iowork, func);
	init_waitqueue_head(&wait->wait_dma);
	init_waitqueue_head(&wait->wait_pio);
	atomic_set(&wait->sdma_busy, 0);
	atomic_set(&wait->pio_busy, 0);
	wait->tx_limit = tx_limit;
	wait->sleep = sleep;
	wait->wakeup = wakeup;
@@ -174,6 +180,88 @@ static inline void iowait_sdma_drain(struct iowait *wait)
	wait_event(wait->wait_dma, !atomic_read(&wait->sdma_busy));
}

/**
 * iowait_sdma_pending() - return sdma pending count
 *
 * @wait: iowait structure
 *
 */
static inline int iowait_sdma_pending(struct iowait *wait)
{
	return atomic_read(&wait->sdma_busy);
}

/**
 * iowait_sdma_inc - note sdma io pending
 * @wait: iowait structure
 */
static inline void iowait_sdma_inc(struct iowait *wait)
{
	atomic_inc(&wait->sdma_busy);
}

/**
 * iowait_sdma_add - add count to pending
 * @wait: iowait structure
 */
static inline void iowait_sdma_add(struct iowait *wait, int count)
{
	atomic_add(count, &wait->sdma_busy);
}

/**
 * iowait_sdma_dec - note sdma complete
 * @wait: iowait structure
 */
static inline int iowait_sdma_dec(struct iowait *wait)
{
	return atomic_dec_and_test(&wait->sdma_busy);
}

/**
 * iowait_pio_drain() - wait for pios to drain
 *
 * @wait: iowait structure
 *
 * This will delay until the iowait pios have
 * completed.
 */
static inline void iowait_pio_drain(struct iowait *wait)
{
	wait_event_timeout(wait->wait_pio,
			   !atomic_read(&wait->pio_busy),
			   HZ);
}

/**
 * iowait_pio_pending() - return pio pending count
 *
 * @wait: iowait structure
 *
 */
static inline int iowait_pio_pending(struct iowait *wait)
{
	return atomic_read(&wait->pio_busy);
}

/**
 * iowait_pio_inc - note pio pending
 * @wait: iowait structure
 */
static inline void iowait_pio_inc(struct iowait *wait)
{
	atomic_inc(&wait->pio_busy);
}

/**
 * iowait_sdma_dec - note pio complete
 * @wait: iowait structure
 */
static inline int iowait_pio_dec(struct iowait *wait)
{
	return atomic_dec_and_test(&wait->pio_busy);
}

/**
 * iowait_drain_wakeup() - trigger iowait_drain() waiter
 *
@@ -184,6 +272,7 @@ static inline void iowait_sdma_drain(struct iowait *wait)
static inline void iowait_drain_wakeup(struct iowait *wait)
{
	wake_up(&wait->wait_dma);
	wake_up(&wait->wait_pio);
}

/**
+2 −1
Original line number Diff line number Diff line
@@ -1564,7 +1564,8 @@ static void sc_piobufavail(struct send_context *sc)
	write_sequnlock_irqrestore(&dev->iowait_lock, flags);

	for (i = 0; i < n; i++)
		hfi1_qp_wakeup(qps[i], RVT_S_WAIT_PIO);
		hfi1_qp_wakeup(qps[i],
			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
}

/* translate a send credit update to a bit code of reasons */
Loading