Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a92ac140 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'cxgb4-speed-up-reading-on-chip-memory'



Rahul Lakkireddy says:

====================
cxgb4: speed up reading on-chip memory

This series of patches speed up reading on-chip memory (EDC and MC)
by reading 64-bits at a time.

Patch 1 reworks logic to read EDC and MC.

Patch 2 adds logic to read EDC and MC 64-bits at a time.

v2:
- Dropped AVX CPU intrinsic instructions.
- Use readq() to read 64-bits at a time.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 9942895b 7494f980
Loading
Loading
Loading
Loading
+82 −4
Original line number Diff line number Diff line
@@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type,
				      &payload->start, &payload->end);
}

static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win,
			     int mtype, u32 addr, u32 len, void *hbuf)
{
	u32 win_pf, memoffset, mem_aperture, mem_base;
	struct adapter *adap = pdbg_init->adap;
	u32 pos, offset, resid;
	u32 *res_buf;
	u64 *buf;
	int ret;

	/* Argument sanity checks ...
	 */
	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
		return -EINVAL;

	buf = (u64 *)hbuf;

	/* Try to do 64-bit reads.  Residual will be handled later. */
	resid = len & 0x7;
	len -= resid;

	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
				&mem_aperture);
	if (ret)
		return ret;

	addr = addr + memoffset;
	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);

	pos = addr & ~(mem_aperture - 1);
	offset = addr - pos;

	/* Set up initial PCI-E Memory Window to cover the start of our
	 * transfer.
	 */
	t4_memory_update_win(adap, win, pos | win_pf);

	/* Transfer data from the adapter */
	while (len > 0) {
		*buf++ = le64_to_cpu((__force __le64)
				     t4_read_reg64(adap, mem_base + offset));
		offset += sizeof(u64);
		len -= sizeof(u64);

		/* If we've reached the end of our current window aperture,
		 * move the PCI-E Memory Window on to the next.
		 */
		if (offset == mem_aperture) {
			pos += mem_aperture;
			offset = 0;
			t4_memory_update_win(adap, win, pos | win_pf);
		}
	}

	res_buf = (u32 *)buf;
	/* Read residual in 32-bit multiples */
	while (resid > sizeof(u32)) {
		*res_buf++ = le32_to_cpu((__force __le32)
					 t4_read_reg(adap, mem_base + offset));
		offset += sizeof(u32);
		resid -= sizeof(u32);

		/* If we've reached the end of our current window aperture,
		 * move the PCI-E Memory Window on to the next.
		 */
		if (offset == mem_aperture) {
			pos += mem_aperture;
			offset = 0;
			t4_memory_update_win(adap, win, pos | win_pf);
		}
	}

	/* Transfer residual < 32-bits */
	if (resid)
		t4_memory_rw_residual(adap, resid, mem_base + offset,
				      (u8 *)res_buf, T4_MEMORY_READ);

	return 0;
}

#define CUDBG_YIELD_ITERATION 256

static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
@@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
				goto skip_read;

		spin_lock(&padap->win0_lock);
		rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
				  bytes_read, bytes,
				  (__be32 *)temp_buff.data,
				  1);
		rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type,
				       bytes_read, bytes, temp_buff.data);
		spin_unlock(&padap->win0_lock);
		if (rc) {
			cudbg_err->sys_err = rc;
+5 −0
Original line number Diff line number Diff line
@@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg);
u32 t4_get_util_window(struct adapter *adap);
void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window);

int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
		      u32 *mem_base, u32 *mem_aperture);
void t4_memory_update_win(struct adapter *adap, int win, u32 addr);
void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
			   int dir);
#define T4_MEMORY_WRITE	0
#define T4_MEMORY_READ	1
int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len,
+124 −69
Original line number Diff line number Diff line
@@ -483,6 +483,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx)
	return 0;
}

/**
 * t4_memory_rw_init - Get memory window relative offset, base, and size.
 * @adap: the adapter
 * @win: PCI-E Memory Window to use
 * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
 * @mem_off: memory relative offset with respect to @mtype.
 * @mem_base: configured memory base address.
 * @mem_aperture: configured memory window aperture.
 *
 * Get the configured memory window's relative offset, base, and size.
 */
int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off,
		      u32 *mem_base, u32 *mem_aperture)
{
	u32 edc_size, mc_size, mem_reg;

	/* Offset into the region of memory which is being accessed
	 * MEM_EDC0 = 0
	 * MEM_EDC1 = 1
	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
	 * MEM_HMA  = 4
	 */
	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
	if (mtype == MEM_HMA) {
		*mem_off = 2 * (edc_size * 1024 * 1024);
	} else if (mtype != MEM_MC1) {
		*mem_off = (mtype * (edc_size * 1024 * 1024));
	} else {
		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
						      MA_EXT_MEMORY0_BAR_A));
		*mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
	}

	/* Each PCI-E Memory Window is programmed with a window size -- or
	 * "aperture" -- which controls the granularity of its mapping onto
	 * adapter memory.  We need to grab that aperture in order to know
	 * how to use the specified window.  The window is also programmed
	 * with the base address of the Memory Window in BAR0's address
	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
	 * the address is relative to BAR0.
	 */
	mem_reg = t4_read_reg(adap,
			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
						  win));
	/* a dead adapter will return 0xffffffff for PIO reads */
	if (mem_reg == 0xffffffff)
		return -ENXIO;

	*mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
	*mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
	if (is_t4(adap->params.chip))
		*mem_base -= adap->t4_bar0;

	return 0;
}

/**
 * t4_memory_update_win - Move memory window to specified address.
 * @adap: the adapter
 * @win: PCI-E Memory Window to use
 * @addr: location to move.
 *
 * Move memory window to specified address.
 */
void t4_memory_update_win(struct adapter *adap, int win, u32 addr)
{
	t4_write_reg(adap,
		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
		     addr);
	/* Read it back to ensure that changes propagate before we
	 * attempt to use the new value.
	 */
	t4_read_reg(adap,
		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
}

/**
 * t4_memory_rw_residual - Read/Write residual data.
 * @adap: the adapter
 * @off: relative offset within residual to start read/write.
 * @addr: address within indicated memory type.
 * @buf: host memory buffer
 * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
 *
 * Read/Write residual data less than 32-bits.
 */
void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf,
			   int dir)
{
	union {
		u32 word;
		char byte[4];
	} last;
	unsigned char *bp;
	int i;

	if (dir == T4_MEMORY_READ) {
		last.word = le32_to_cpu((__force __le32)
					t4_read_reg(adap, addr));
		for (bp = (unsigned char *)buf, i = off; i < 4; i++)
			bp[i] = last.byte[i];
	} else {
		last.word = *buf;
		for (i = off; i < 4; i++)
			last.byte[i] = 0;
		t4_write_reg(adap, addr,
			     (__force u32)cpu_to_le32(last.word));
	}
}

/**
 *	t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window
 *	@adap: the adapter
@@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
		 u32 len, void *hbuf, int dir)
{
	u32 pos, offset, resid, memoffset;
	u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base;
	u32 win_pf, mem_aperture, mem_base;
	u32 *buf;
	int ret;

	/* Argument sanity checks ...
	 */
@@ -521,42 +633,14 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
	resid = len & 0x3;
	len -= resid;

	/* Offset into the region of memory which is being accessed
	 * MEM_EDC0 = 0
	 * MEM_EDC1 = 1
	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
	 * MEM_HMA  = 4
	 */
	edc_size  = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A));
	if (mtype == MEM_HMA) {
		memoffset = 2 * (edc_size * 1024 * 1024);
	} else if (mtype != MEM_MC1) {
		memoffset = (mtype * (edc_size * 1024 * 1024));
	} else {
		mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap,
						      MA_EXT_MEMORY0_BAR_A));
		memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
	}
	ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base,
				&mem_aperture);
	if (ret)
		return ret;

	/* Determine the PCIE_MEM_ACCESS_OFFSET */
	addr = addr + memoffset;

	/* Each PCI-E Memory Window is programmed with a window size -- or
	 * "aperture" -- which controls the granularity of its mapping onto
	 * adapter memory.  We need to grab that aperture in order to know
	 * how to use the specified window.  The window is also programmed
	 * with the base address of the Memory Window in BAR0's address
	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
	 * the address is relative to BAR0.
	 */
	mem_reg = t4_read_reg(adap,
			      PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A,
						  win));
	mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X);
	mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X;
	if (is_t4(adap->params.chip))
		mem_base -= adap->t4_bar0;
	win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf);

	/* Calculate our initial PCI-E Memory Window Position and Offset into
@@ -566,14 +650,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
	offset = addr - pos;

	/* Set up initial PCI-E Memory Window to cover the start of our
	 * transfer.  (Read it back to ensure that changes propagate before we
	 * attempt to use the new value.)
	 * transfer.
	 */
	t4_write_reg(adap,
		     PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win),
		     pos | win_pf);
	t4_read_reg(adap,
		    PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win));
	t4_memory_update_win(adap, win, pos | win_pf);

	/* Transfer data to/from the adapter as long as there's an integral
	 * number of 32-bit transfers to complete.
@@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
		if (offset == mem_aperture) {
			pos += mem_aperture;
			offset = 0;
			t4_write_reg(adap,
				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
						    win), pos | win_pf);
			t4_read_reg(adap,
				PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A,
						    win));
			t4_memory_update_win(adap, win, pos | win_pf);
		}
	}

@@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr,
	 * residual amount.  The PCI-E Memory Window has already been moved
	 * above (if necessary) to cover this final transfer.
	 */
	if (resid) {
		union {
			u32 word;
			char byte[4];
		} last;
		unsigned char *bp;
		int i;

		if (dir == T4_MEMORY_READ) {
			last.word = le32_to_cpu(
					(__force __le32)t4_read_reg(adap,
						mem_base + offset));
			for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
				bp[i] = last.byte[i];
		} else {
			last.word = *buf;
			for (i = resid; i < 4; i++)
				last.byte[i] = 0;
			t4_write_reg(adap, mem_base + offset,
				     (__force u32)cpu_to_le32(last.word));
		}
	}
	if (resid)
		t4_memory_rw_residual(adap, resid, mem_base + offset,
				      (u8 *)buf, dir);

	return 0;
}