Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8af8d297 authored by Sebastian Sanchez's avatar Sebastian Sanchez Committed by Doug Ledford
Browse files

IB/hfi1: Optimize pio_buf and send_context structs



Both pio_buf and send_context structs have oversized
fields and have cachelines that can be optimized.

Reduce oversized fields for both structs.
Make sure pio_buf struct fits within a cacheline.
Move read-only fields to their own cacheline in
send_context struct.

All of this will avoid cacheline trading as the ring
progresses and pio buffers/send contexts are used.

Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 2474d775
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
	sc->hw_context = hw_context;
	cr_group_addresses(sc, &dma);
	sc->credits = sci->credits;
	sc->size = sc->credits * PIO_BLOCK_SIZE;

/* PIO Send Memory Address details */
#define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1470,9 +1471,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,

	/* finish filling in the buffer outside the lock */
	pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
	pbuf->size = sc->credits * PIO_BLOCK_SIZE;
	pbuf->end = sc->base_addr + pbuf->size;
	pbuf->block_count = blocks;
	pbuf->end = sc->base_addr + sc->size;
	pbuf->qw_written = 0;
	pbuf->carry_bytes = 0;
	pbuf->carry.val64 = 0;
+15 −14
Original line number Diff line number Diff line
@@ -83,43 +83,43 @@ struct pio_buf {
	void *arg;		/* argument for cb */
	void __iomem *start;	/* buffer start address */
	void __iomem *end;	/* context end address */
	unsigned long size;	/* context size, in bytes */
	unsigned long sent_at;	/* buffer is sent when <= free */
	u32 block_count;	/* size of buffer, in blocks */
	u32 qw_written;		/* QW written so far */
	u32 carry_bytes;	/* number of valid bytes in carry */
	union mix carry;	/* pending unwritten bytes */
	u16 qw_written;		/* QW written so far */
	u8 carry_bytes;	/* number of valid bytes in carry */
};

/* cache line aligned pio buffer array */
union pio_shadow_ring {
	struct pio_buf pbuf;
	u64 unused[16];		/* cache line spacer */
} ____cacheline_aligned;

/* per-NUMA send context */
struct send_context {
	/* read-only after init */
	struct hfi1_devdata *dd;		/* device */
	void __iomem *base_addr;	/* start of PIO memory */
	union pio_shadow_ring *sr;	/* shadow ring */
	void __iomem *base_addr;	/* start of PIO memory */
	u32 __percpu *buffers_allocated;/* count of buffers allocated */
	u32 size;			/* context size, in bytes */

	struct work_struct halt_work;	/* halted context work queue entry */
	unsigned long flags;		/* flags */
	int node;			/* context home node */
	int type;			/* context type */
	u32 sw_index;			/* software index number */
	u32 hw_context;			/* hardware context number */
	u32 credits;			/* number of blocks in context */
	u32 sr_size;			/* size of the shadow ring */
	u32 group;			/* credit return group */
	u16 flags;			/* flags */
	u8  type;			/* context type */
	u8  sw_index;			/* software index number */
	u8  hw_context;			/* hardware context number */
	u8  group;			/* credit return group */

	/* allocator fields */
	spinlock_t alloc_lock ____cacheline_aligned_in_smp;
	u32 sr_head;			/* shadow ring head */
	unsigned long fill;		/* official alloc count */
	unsigned long alloc_free;	/* copy of free (less cache thrash) */
	u32 __percpu *buffers_allocated;/* count of buffers allocated */
	u32 fill_wrap;			/* tracks fill within ring */
	u32 credits;			/* number of blocks in context */
	/* adding a new field here would make it part of this cacheline */

	/* releaser fields */
	spinlock_t release_lock ____cacheline_aligned_in_smp;
	u32 sr_tail;			/* shadow ring tail */
@@ -131,6 +131,7 @@ struct send_context {
	u32 credit_intr_count;		/* count of credit intr users */
	u64 credit_ctrl;		/* cache for credit control */
	wait_queue_head_t halt_wait;    /* wait until kernel sees interrupt */
	struct work_struct halt_work;	/* halted context work queue entry */
};

/* send context flags */
+11 −11
Original line number Diff line number Diff line
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
				dest += sizeof(u64);
			}

			dest -= pbuf->size;
			dend -= pbuf->size;
			dest -= pbuf->sc->size;
			dend -= pbuf->sc->size;
		}

		/* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
				dest += sizeof(u64);
			}

			dest -= pbuf->size;
			dend -= pbuf->size;
			dest -= pbuf->sc->size;
			dend -= pbuf->sc->size;
		}

		/* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
			dest += sizeof(u64);
		}

		dest -= pbuf->size;
		dend -= pbuf->size;
		dest -= pbuf->sc->size;
		dend -= pbuf->sc->size;
	}

	/* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
		 */
		/* adjust if we have wrapped */
		if (dest >= pbuf->end)
			dest -= pbuf->size;
			dest -= pbuf->sc->size;
		/* jump to the SOP range if within the first block */
		else if (pbuf->qw_written < PIO_BLOCK_QWS)
			dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
			dest += sizeof(u64);
		}

		dest -= pbuf->size;
		dend -= pbuf->size;
		dest -= pbuf->sc->size;
		dend -= pbuf->sc->size;
	}

	/* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
			 */
			/* adjust if we've wrapped */
			if (dest >= pbuf->end)
				dest -= pbuf->size;
				dest -= pbuf->sc->size;
			/* jump to SOP range if within the first block */
			else if (pbuf->qw_written < PIO_BLOCK_QWS)
				dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
	 */
	/* adjust if we have wrapped */
	if (dest >= pbuf->end)
		dest -= pbuf->size;
		dest -= pbuf->sc->size;
	/* jump to the SOP range if within the first block */
	else if (pbuf->qw_written < PIO_BLOCK_QWS)
		dest += SOP_DISTANCE;