Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 9b3c520e authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'qed-Improve-performance-on-100G-link-for-offload-protocols'



Michal Kalderon says:

====================
qed*: Improve performance on 100G link for offload protocols

This patch series modifies the current implementation of PF selection.
The refactoring of the llh code enables setting additional filters
(mac / protocol) per PF, and improves performance for offload protocols
(RoCE, iWARP, iSCSI, fcoe) on 100G link (was capped at 90G per single
PF).

Improved performance on 100G link is achieved by configuring engine
affinty to each PF.
The engine affinity is read from the Management FW and hw is configured accordingly.
A new hw resource called PPFID is exposed and an API is introduced to utilize
it. This additional resource enables setting the affinity of a PF and providing
more classification rules per PF.
qedr,qedi,qedf are also modified as part of the series. Without the
changes functionality is broken.

v1 --> v2
---------
- Remove iWARP module parameter. Instead use devlink param infrastructure
  for setting the iwarp_cmt mode. Additional patch added to the series for
  adding the devlink support.

- Fix kbuild test robot warning on qed_llh_filter initialization.

- Remove comments inside function calls
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cecd7582 a9b02c61
Loading
Loading
Loading
Loading
+22 −3
Original line number Diff line number Diff line
@@ -312,7 +312,8 @@ static void qedr_free_mem_sb(struct qedr_dev *dev,
			     struct qed_sb_info *sb_info, int sb_id)
{
	if (sb_info->sb_virt) {
		dev->ops->common->sb_release(dev->cdev, sb_info, sb_id);
		dev->ops->common->sb_release(dev->cdev, sb_info, sb_id,
					     QED_SB_TYPE_CNQ);
		dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt),
				  (void *)sb_info->sb_virt, sb_info->sb_phys);
	}
@@ -504,11 +505,13 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
static void qedr_sync_free_irqs(struct qedr_dev *dev)
{
	u32 vector;
	u16 idx;
	int i;

	for (i = 0; i < dev->int_info.used_cnt; i++) {
		if (dev->int_info.msix_cnt) {
			vector = dev->int_info.msix[i * dev->num_hwfns].vector;
			idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
			vector = dev->int_info.msix[idx].vector;
			synchronize_irq(vector);
			free_irq(vector, &dev->cnq_array[i]);
		}
@@ -520,6 +523,7 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
static int qedr_req_msix_irqs(struct qedr_dev *dev)
{
	int i, rc = 0;
	u16 idx;

	if (dev->num_cnq > dev->int_info.msix_cnt) {
		DP_ERR(dev,
@@ -529,7 +533,8 @@ static int qedr_req_msix_irqs(struct qedr_dev *dev)
	}

	for (i = 0; i < dev->num_cnq; i++) {
		rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector,
		idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
		rc = request_irq(dev->int_info.msix[idx].vector,
				 qedr_irq_handler, 0, dev->cnq_array[i].name,
				 &dev->cnq_array[i]);
		if (rc) {
@@ -866,6 +871,16 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
	dev->user_dpm_enabled = dev_info.user_dpm_enabled;
	dev->rdma_type = dev_info.rdma_type;
	dev->num_hwfns = dev_info.common.num_hwfns;

	if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) {
		rc = dev->ops->iwarp_set_engine_affin(cdev, false);
		if (rc) {
			DP_ERR(dev, "iWARP is disabled over a 100g device Enabling it may impact L2 performance. To enable it run devlink dev param set <dev> name iwarp_cmt value true cmode runtime\n");
			goto init_err;
		}
	}
	dev->affin_hwfn_idx = dev->ops->common->get_affin_hwfn_idx(cdev);

	dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev);

	dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
@@ -926,6 +941,10 @@ static void qedr_remove(struct qedr_dev *dev)
	qedr_stop_hw(dev);
	qedr_sync_free_irqs(dev);
	qedr_free_resources(dev);

	if (IS_IWARP(dev) && QEDR_IS_CMT(dev))
		dev->ops->iwarp_set_engine_affin(dev->cdev, true);

	ib_dealloc_device(&dev->ibdev);
}

+2 −0
Original line number Diff line number Diff line
@@ -157,6 +157,8 @@ struct qedr_dev {
	u32			dp_module;
	u8			dp_level;
	u8			num_hwfns;
#define QEDR_IS_CMT(dev)        ((dev)->num_hwfns > 1)
	u8			affin_hwfn_idx;
	u8			gsi_ll2_handle;

	uint			wq_multiplier;
+22 −2
Original line number Diff line number Diff line
@@ -140,6 +140,7 @@ struct qed_cxt_mngr;
struct qed_sb_sp_info;
struct qed_ll2_info;
struct qed_mcp_info;
struct qed_llh_info;

struct qed_rt_data {
	u32	*init_val;
@@ -741,6 +742,7 @@ struct qed_dev {
#define QED_DEV_ID_MASK		0xff00
#define QED_DEV_ID_MASK_BB	0x1600
#define QED_DEV_ID_MASK_AH	0x8000
#define QED_IS_E4(dev)  (QED_IS_BB(dev) || QED_IS_AH(dev))

	u16	chip_num;
#define CHIP_NUM_MASK                   0xffff
@@ -801,6 +803,11 @@ struct qed_dev {
	u8				num_hwfns;
	struct qed_hwfn			hwfns[MAX_HWFNS_PER_DEVICE];

	/* Engine affinity */
	u8				l2_affin_hint;
	u8				fir_affin;
	u8				iwarp_affin;

	/* SRIOV */
	struct qed_hw_sriov_info *p_iov_info;
#define IS_QED_SRIOV(cdev)              (!!(cdev)->p_iov_info)
@@ -815,6 +822,10 @@ struct qed_dev {
	/* Recovery */
	bool recov_in_prog;

	/* LLH info */
	u8 ppfid_bitmap;
	struct qed_llh_info *p_llh_info;

	/* Linux specific here */
	struct  qede_dev		*edev;
	struct  pci_dev			*pdev;
@@ -852,6 +863,9 @@ struct qed_dev {
	u32 rdma_max_inline;
	u32 rdma_max_srq_sge;
	u16 tunn_feature_mask;

	struct devlink			*dl;
	bool				iwarp_cmt;
};

#define NUM_OF_VFS(dev)         (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
@@ -904,6 +918,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
			 __le16 *fw_mid, __le16 *fw_lsb, u8 *mac);

#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])
#define QED_IS_CMT(dev)		((dev)->num_hwfns > 1)
/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */
#define QED_FIR_AFFIN_HWFN(dev)		(&(dev)->hwfns[dev->fir_affin])
#define QED_IWARP_AFFIN_HWFN(dev)       (&(dev)->hwfns[dev->iwarp_affin])
#define QED_AFFIN_HWFN(dev)				   \
	(QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \
	 QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev))
#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1)

/* Flags for indication of required queues */
#define PQ_FLAGS_RLS    (BIT(0))
@@ -923,8 +945,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);

#define QED_LEADING_HWFN(dev)   (&dev->hwfns[0])

/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
+3 −2
Original line number Diff line number Diff line
@@ -2351,7 +2351,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,

	/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
	qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
			  reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
			  NULL);

	if (elem_type == QED_ELEM_CXT) {
		u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2457,7 +2458,7 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
				  (u64) (uintptr_t) &ilt_hw_entry,
				  reg_offset,
				  sizeof(ilt_hw_entry) / sizeof(u32),
				  0);
				  NULL);
	}

	qed_ptt_release(p_hwfn, p_ptt);
+1 −1
Original line number Diff line number Diff line
@@ -2537,7 +2537,7 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
	    (len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
	     wide_bus)) {
		if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
				       (u64)(uintptr_t)(dump_buf), len, 0))
				       (u64)(uintptr_t)(dump_buf), len, NULL))
			return len;
		dev_data->use_dmae = 0;
		DP_VERBOSE(p_hwfn,
Loading