Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 67afd6d1 authored by Jose Abreu's avatar Jose Abreu Committed by David S. Miller
Browse files

net: stmmac: Add Split Header support and enable it in XGMAC cores



Add the support for Split Header feature in the RX path and enable it in
XGMAC cores.

This does not impact neither beneficts bandwidth but it does reduces CPU
usage because without the feature all the entire packet is memcpy'ed,
while that with the feature only the header is.

With Split Header disabled 'perf stat -d' gives:
86870.624945 task-clock (msec)      #    0.429 CPUs utilized
     1073352 context-switches       #    0.012 M/sec
           1 cpu-migrations         #    0.000 K/sec
         213 page-faults            #    0.002 K/sec
327113872376 cycles                 #    3.766 GHz (62.53%)
 56618161216 instructions           #    0.17  insn per cycle (75.06%)
 10742205071 branches               #  123.658 M/sec (75.36%)
   584309242 branch-misses          #    5.44% of all branches (75.19%)
 17594787965 L1-dcache-loads        #  202.540 M/sec (74.88%)
  4003773131 L1-dcache-load-misses  #   22.76% of all L1-dcache hits (74.89%)
  1313301468 LLC-loads              #   15.118 M/sec (49.75%)
   355906510 LLC-load-misses        #   27.10% of all LL-cache hits (49.92%)

With Split Header enabled 'perf stat -d' gives:
49324.456539 task-clock (msec)     #    0.245 CPUs utilized
     2542387 context-switches      #    0.052 M/sec
           1 cpu-migrations        #    0.000 K/sec
         213 page-faults           #    0.004 K/sec
177092791469 cycles                #    3.590 GHz (62.30%)
 68555756017 instructions          #    0.39  insn per cycle (75.16%)
 12697019382 branches              #  257.418 M/sec (74.81%)
   442081897 branch-misses         #    3.48% of all branches (74.79%)
 20337958358 L1-dcache-loads       #  412.330 M/sec (75.46%)
  3820210140 L1-dcache-load-misses #   18.78% of all L1-dcache hits (75.35%)
  1257719198 LLC-loads             #   25.499 M/sec (49.73%)
   685543923 LLC-load-misses       #   54.51% of all LL-cache hits (49.86%)

Changes from v2:
	- Reword commit message (Jakub)
Changes from v1:
	- Add performance info (David)
	- Add misssing dma_sync_single_for_device()

Signed-off-by: default avatarJose Abreu <joabreu@synopsys.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c887e02a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -356,6 +356,7 @@ struct dma_features {
	unsigned int addr64;
	unsigned int rssen;
	unsigned int vlhash;
	unsigned int sphen;
};

/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
+6 −0
Original line number Diff line number Diff line
@@ -32,6 +32,9 @@
#define XGMAC_CONFIG_ARPEN		BIT(31)
#define XGMAC_CONFIG_GPSL		GENMASK(29, 16)
#define XGMAC_CONFIG_GPSL_SHIFT		16
#define XGMAC_CONFIG_HDSMS		GENMASK(14, 12)
#define XGMAC_CONFIG_HDSMS_SHIFT	12
#define XGMAC_CONFIG_HDSMS_256		(0x2 << XGMAC_CONFIG_HDSMS_SHIFT)
#define XGMAC_CONFIG_S2KP		BIT(11)
#define XGMAC_CONFIG_LM			BIT(10)
#define XGMAC_CONFIG_IPC		BIT(9)
@@ -101,6 +104,7 @@
#define XGMAC_HW_FEATURE1		0x00000120
#define XGMAC_HWFEAT_RSSEN		BIT(20)
#define XGMAC_HWFEAT_TSOEN		BIT(18)
#define XGMAC_HWFEAT_SPHEN		BIT(17)
#define XGMAC_HWFEAT_ADDR64		GENMASK(15, 14)
#define XGMAC_HWFEAT_TXFIFOSIZE		GENMASK(10, 6)
#define XGMAC_HWFEAT_RXFIFOSIZE		GENMASK(4, 0)
@@ -258,6 +262,7 @@
#define XGMAC_TCEIE			BIT(0)
#define XGMAC_DMA_ECC_INT_STATUS	0x0000306c
#define XGMAC_DMA_CH_CONTROL(x)		(0x00003100 + (0x80 * (x)))
#define XGMAC_SPH			BIT(24)
#define XGMAC_PBLx8			BIT(16)
#define XGMAC_DMA_CH_TX_CONTROL(x)	(0x00003104 + (0x80 * (x)))
#define XGMAC_TxPBL			GENMASK(21, 16)
@@ -318,6 +323,7 @@
#define XGMAC_TDES3_CIC_SHIFT		16
#define XGMAC_TDES3_TPL			GENMASK(17, 0)
#define XGMAC_TDES3_FL			GENMASK(14, 0)
#define XGMAC_RDES2_HL			GENMASK(9, 0)
#define XGMAC_RDES3_OWN			BIT(31)
#define XGMAC_RDES3_CTXT		BIT(30)
#define XGMAC_RDES3_IOC			BIT(30)
+17 −1
Original line number Diff line number Diff line
@@ -29,6 +29,8 @@ static int dwxgmac2_get_rx_status(void *data, struct stmmac_extra_stats *x,

	if (unlikely(rdes3 & XGMAC_RDES3_OWN))
		return dma_own;
	if (unlikely(rdes3 & XGMAC_RDES3_CTXT))
		return discard_frame;
	if (likely(!(rdes3 & XGMAC_RDES3_LD)))
		return rx_not_ls;
	if (unlikely((rdes3 & XGMAC_RDES3_ES) && (rdes3 & XGMAC_RDES3_LD)))
@@ -54,7 +56,7 @@ static void dwxgmac2_set_tx_owner(struct dma_desc *p)

static void dwxgmac2_set_rx_owner(struct dma_desc *p, int disable_rx_ic)
{
	p->des3 = cpu_to_le32(XGMAC_RDES3_OWN);
	p->des3 |= cpu_to_le32(XGMAC_RDES3_OWN);

	if (!disable_rx_ic)
		p->des3 |= cpu_to_le32(XGMAC_RDES3_IOC);
@@ -284,6 +286,18 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
	return -EINVAL;
}

static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
{
	*len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
	return 0;
}

static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
{
	p->des2 = cpu_to_le32(lower_32_bits(addr));
	p->des3 = cpu_to_le32(upper_32_bits(addr));
}

const struct stmmac_desc_ops dwxgmac210_desc_ops = {
	.tx_status = dwxgmac2_get_tx_status,
	.rx_status = dwxgmac2_get_rx_status,
@@ -308,4 +322,6 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
	.set_addr = dwxgmac2_set_addr,
	.clear = dwxgmac2_clear,
	.get_rx_hash = dwxgmac2_get_rx_hash,
	.get_rx_header_len = dwxgmac2_get_rx_header_len,
	.set_sec_addr = dwxgmac2_set_sec_addr,
};
+18 −0
Original line number Diff line number Diff line
@@ -366,6 +366,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
	dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
	dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
	dma_cap->sphen = (hw_cap & XGMAC_HWFEAT_SPHEN) >> 17;

	dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
	switch (dma_cap->addr64) {
@@ -472,6 +473,22 @@ static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
	writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
}

static void dwxgmac2_enable_sph(void __iomem *ioaddr, bool en, u32 chan)
{
	u32 value = readl(ioaddr + XGMAC_RX_CONFIG);

	value &= ~XGMAC_CONFIG_HDSMS;
	value |= XGMAC_CONFIG_HDSMS_256; /* Segment max 256 bytes */
	writel(value, ioaddr + XGMAC_RX_CONFIG);

	value = readl(ioaddr + XGMAC_DMA_CH_CONTROL(chan));
	if (en)
		value |= XGMAC_SPH;
	else
		value &= ~XGMAC_SPH;
	writel(value, ioaddr + XGMAC_DMA_CH_CONTROL(chan));
}

const struct stmmac_dma_ops dwxgmac210_dma_ops = {
	.reset = dwxgmac2_dma_reset,
	.init = dwxgmac2_dma_init,
@@ -498,4 +515,5 @@ const struct stmmac_dma_ops dwxgmac210_dma_ops = {
	.enable_tso = dwxgmac2_enable_tso,
	.qmode = dwxgmac2_qmode,
	.set_bfsize = dwxgmac2_set_bfsize,
	.enable_sph = dwxgmac2_enable_sph,
};
+9 −0
Original line number Diff line number Diff line
@@ -89,6 +89,8 @@ struct stmmac_desc_ops {
	/* RSS */
	int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
			   enum pkt_hash_types *type);
	int (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
	void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
};

#define stmmac_init_rx_desc(__priv, __args...) \
@@ -141,6 +143,10 @@ struct stmmac_desc_ops {
	stmmac_do_void_callback(__priv, desc, clear, __args)
#define stmmac_get_rx_hash(__priv, __args...) \
	stmmac_do_callback(__priv, desc, get_rx_hash, __args)
#define stmmac_get_rx_header_len(__priv, __args...) \
	stmmac_do_callback(__priv, desc, get_rx_header_len, __args)
#define stmmac_set_desc_sec_addr(__priv, __args...) \
	stmmac_do_void_callback(__priv, desc, set_sec_addr, __args)

struct stmmac_dma_cfg;
struct dma_features;
@@ -191,6 +197,7 @@ struct stmmac_dma_ops {
	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
	void (*qmode)(void __iomem *ioaddr, u32 channel, u8 qmode);
	void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
	void (*enable_sph)(void __iomem *ioaddr, bool en, u32 chan);
};

#define stmmac_reset(__priv, __args...) \
@@ -247,6 +254,8 @@ struct stmmac_dma_ops {
	stmmac_do_void_callback(__priv, dma, qmode, __args)
#define stmmac_set_dma_bfsize(__priv, __args...) \
	stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
#define stmmac_enable_sph(__priv, __args...) \
	stmmac_do_void_callback(__priv, dma, enable_sph, __args)

struct mac_device_info;
struct net_device;
Loading