Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 58310b3f authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'mlx4-next'



Or Gerlitz says:

====================
mlx4: CQE/EQE stride support

This series from Ido Shamay is intended for archs having
cache line larger then 64 bytes.

Since our CQE/EQEs are generally 64B in those systems, HW will write
twice to the same cache line consecutively, causing pipe locks due to
he hazard prevention mechanism. For elements in a cyclic buffer, writes
are consecutive, so entries smaller than a cache line should be
avoided, especially if they are written at a high rate.

Reduce consecutive writes to same cache line in CQs/EQs, by allowing the
driver to increase the distance between entries so that each will reside
in a different cache line.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 54003f11 b1b6b4da
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2459,6 +2459,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
	}
	priv->rx_ring_num = prof->rx_ring_num;
	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
	priv->cqe_size = mdev->dev->caps.cqe_size;
	priv->mac_index = -1;
	priv->msg_enable = MLX4_EN_MSG_LEVEL;
	spin_lock_init(&priv->stats_lock);
+2 −2
Original line number Diff line number Diff line
@@ -671,7 +671,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
	 * descriptor offset can be deduced from the CQE index instead of
	 * reading 'cqe->index' */
	index = cq->mcq.cons_index & ring->size_mask;
	cqe = &cq->buf[(index << factor) + factor];
	cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;

	/* Process all completed CQEs */
	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -858,7 +858,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud

		++cq->mcq.cons_index;
		index = (cq->mcq.cons_index) & ring->size_mask;
		cqe = &cq->buf[(index << factor) + factor];
		cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
		if (++polled == budget)
			goto out;
	}
+2 −2
Original line number Diff line number Diff line
@@ -382,7 +382,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
		return true;

	index = cons_index & size_mask;
	cqe = &buf[(index << factor) + factor];
	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
	ring_index = ring->cons & size_mask;
	stamp_index = ring_index;

@@ -430,7 +430,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,

		++cons_index;
		index = cons_index & size_mask;
		cqe = &buf[(index << factor) + factor];
		cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
	}


+19 −11
Original line number Diff line number Diff line
@@ -101,21 +101,24 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
	mb();
}

static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor,
				u8 eqe_size)
{
	/* (entry & (eq->nent - 1)) gives us a cyclic array */
	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
	 * When this feature is enabled, the first (in the lower addresses)
	unsigned long offset = (entry & (eq->nent - 1)) * eqe_size;
	/* CX3 is capable of extending the EQE from 32 to 64 bytes with
	 * strides of 64B,128B and 256B.
	 * When 64B EQE is used, the first (in the lower addresses)
	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
	 * contain the legacy EQE information.
	 * In all other cases, the first 32B contains the legacy EQE info.
	 */
	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
}

static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor, u8 size)
{
	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor, size);
	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
}

@@ -459,8 +462,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
	enum slave_port_gen_event gen_event;
	unsigned long flags;
	struct mlx4_vport_state *s_info;
	int eqe_size = dev->caps.eqe_size;

	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor, eqe_size))) {
		/*
		 * Make sure we read EQ entry contents after we've
		 * checked the ownership bit.
@@ -894,8 +898,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,

	eq->dev   = dev;
	eq->nent  = roundup_pow_of_two(max(nent, 2));
	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
	 * strides of 64B,128B and 256B.
	 */
	npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE;

	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
				GFP_KERNEL);
@@ -997,8 +1003,10 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
	struct mlx4_cmd_mailbox *mailbox;
	int err;
	int i;
	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes, with
	 * strides of 64B,128B and 256B
	 */
	int npages = PAGE_ALIGN(dev->caps.eqe_size  * eq->nent) / PAGE_SIZE;

	mailbox = mlx4_alloc_cmd_mailbox(dev);
	if (IS_ERR(mailbox))
+36 −2
Original line number Diff line number Diff line
@@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
		[8] = "Dynamic QP updates support",
		[9] = "Device managed flow steering IPoIB support",
		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
		[11] = "MAD DEMUX (Secure-Host) support"
		[11] = "MAD DEMUX (Secure-Host) support",
		[12] = "Large cache line (>64B) CQE stride support",
		[13] = "Large cache line (>64B) EQE stride support"
	};
	int i;

@@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET	0x74
#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE	0x7a
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET	0x84
@@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
	dev_cap->max_rq_sg = field;
	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
	dev_cap->max_rq_desc_sz = size;
	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
	if (field & (1 << 6))
		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
	if (field & (1 << 7))
		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;

	MLX4_GET(dev_cap->bmme_flags, outbox,
		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
@@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define	 INIT_HCA_CQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x30)
#define	 INIT_HCA_LOG_CQ_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x37)
#define	 INIT_HCA_EQE_CQE_OFFSETS	 (INIT_HCA_QPC_OFFSET + 0x38)
#define	 INIT_HCA_EQE_CQE_STRIDE_OFFSET  (INIT_HCA_QPC_OFFSET + 0x3b)
#define	 INIT_HCA_ALTC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x40)
#define	 INIT_HCA_AUXC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x50)
#define	 INIT_HCA_EQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x60)
@@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
		dev->caps.cqe_size   = 64;
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
	} else {
		dev->caps.cqe_size   = 32;
	}

	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
	    (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
		dev->caps.eqe_size = cache_line_size();
		dev->caps.cqe_size = cache_line_size();
		dev->caps.eqe_factor = 0;
		MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
				      (ilog2(dev->caps.eqe_size) - 5)),
			 INIT_HCA_EQE_CQE_STRIDE_OFFSET);

		/* User still need to know to support CQE > 32B */
		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
	}

	/* QPC/EEC/CQC/EQC/RDMARC attributes */

	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
	if (byte_field & 0x40) /* 64-bytes cqe enabled */
		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;

	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
	if (byte_field) {
		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
		param->cqe_size = 1 << ((byte_field &
					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
		param->eqe_size = 1 << (((byte_field &
					  MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
	}

	/* TPT attributes */

	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
Loading