Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b1a49412 authored by Xiang Chen's avatar Xiang Chen Committed by Martin K. Petersen
Browse files

scsi: hisi_sas: optimise the usage of hisi_hba.lock



Currently hisi_hba.lock is locked to deliver and receive a command
to/from any hw queue. This causes much contention at high data-rates.

To boost performance, lock on a per queue basis for sending and
receiving commands to/from hw.

Certain critical regions still need to be locked in the delivery and
completion stages with hisi_hba.lock.

New element hisi_sas_device.dq is added to store the delivery queue for
a device, so it does not need to be needlessly re-calculated for every
task.

Signed-off-by: default avatarXiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: default avatarJohn Garry <john.garry@huawei.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent ad604832
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -102,6 +102,8 @@ struct hisi_sas_cq {

struct hisi_sas_dq {
	struct hisi_hba *hisi_hba;
	struct hisi_sas_slot	*slot_prep;
	spinlock_t lock;
	int	wr_point;
	int	id;
};
@@ -109,6 +111,7 @@ struct hisi_sas_dq {
struct hisi_sas_device {
	struct hisi_hba		*hisi_hba;
	struct domain_device	*sas_device;
	struct hisi_sas_dq	*dq;
	struct list_head	list;
	u64 attached_phy;
	atomic64_t running_req;
@@ -154,9 +157,8 @@ struct hisi_sas_hw {
				struct domain_device *device);
	struct hisi_sas_device *(*alloc_dev)(struct domain_device *device);
	void (*sl_notify)(struct hisi_hba *hisi_hba, int phy_no);
	int (*get_free_slot)(struct hisi_hba *hisi_hba, u32 dev_id,
			int *q, int *s);
	void (*start_delivery)(struct hisi_hba *hisi_hba);
	int (*get_free_slot)(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq);
	void (*start_delivery)(struct hisi_sas_dq *dq);
	int (*prep_ssp)(struct hisi_hba *hisi_hba,
			struct hisi_sas_slot *slot, int is_tmf,
			struct hisi_sas_tmf_task *tmf);
@@ -217,7 +219,6 @@ struct hisi_hba {
	struct hisi_sas_port port[HISI_SAS_MAX_PHYS];

	int	queue_count;
	struct hisi_sas_slot	*slot_prep;

	struct dma_pool *sge_page_pool;
	struct hisi_sas_device	devices[HISI_SAS_MAX_DEVICES];
+47 −22
Original line number Diff line number Diff line
@@ -179,10 +179,11 @@ static void hisi_sas_slot_abort(struct work_struct *work)
		task->task_done(task);
}

static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
			      int is_tmf, struct hisi_sas_tmf_task *tmf,
static int hisi_sas_task_prep(struct sas_task *task, struct hisi_sas_dq
		*dq, int is_tmf, struct hisi_sas_tmf_task *tmf,
		int *pass)
{
	struct hisi_hba *hisi_hba = dq->hisi_hba;
	struct domain_device *device = task->dev;
	struct hisi_sas_device *sas_dev = device->lldd_dev;
	struct hisi_sas_port *port;
@@ -240,18 +241,24 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
	} else
		n_elem = task->num_scatter;

	spin_lock_irqsave(&hisi_hba->lock, flags);
	if (hisi_hba->hw->slot_index_alloc)
		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, &slot_idx,
						    device);
	else
		rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
	if (rc)
	if (rc) {
		spin_unlock_irqrestore(&hisi_hba->lock, flags);
		goto err_out;
	rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id,
					&dlvry_queue, &dlvry_queue_slot);
	}
	spin_unlock_irqrestore(&hisi_hba->lock, flags);

	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
	if (rc)
		goto err_out_tag;

	dlvry_queue = dq->id;
	dlvry_queue_slot = dq->wr_point;
	slot = &hisi_hba->slot_info[slot_idx];
	memset(slot, 0, sizeof(struct hisi_sas_slot));

@@ -316,7 +323,7 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
	spin_unlock_irqrestore(&task->task_state_lock, flags);

	hisi_hba->slot_prep = slot;
	dq->slot_prep = slot;

	atomic64_inc(&sas_dev->running_req);
	++(*pass);
@@ -335,7 +342,9 @@ static int hisi_sas_task_prep(struct sas_task *task, struct hisi_hba *hisi_hba,
err_out_slot_buf:
	/* Nothing to be done */
err_out_tag:
	spin_lock_irqsave(&hisi_hba->lock, flags);
	hisi_sas_slot_index_free(hisi_hba, slot_idx);
	spin_unlock_irqrestore(&hisi_hba->lock, flags);
err_out:
	dev_err(dev, "task prep: failed[%d]!\n", rc);
	if (!sas_protocol_ata(task->task_proto))
@@ -354,19 +363,22 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags,
	unsigned long flags;
	struct hisi_hba *hisi_hba = dev_to_hisi_hba(task->dev);
	struct device *dev = &hisi_hba->pdev->dev;
	struct domain_device *device = task->dev;
	struct hisi_sas_device *sas_dev = device->lldd_dev;
	struct hisi_sas_dq *dq = sas_dev->dq;

	if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)))
		return -EINVAL;

	/* protect task_prep and start_delivery sequence */
	spin_lock_irqsave(&hisi_hba->lock, flags);
	rc = hisi_sas_task_prep(task, hisi_hba, is_tmf, tmf, &pass);
	spin_lock_irqsave(&dq->lock, flags);
	rc = hisi_sas_task_prep(task, dq, is_tmf, tmf, &pass);
	if (rc)
		dev_err(dev, "task exec: failed[%d]!\n", rc);

	if (likely(pass))
		hisi_hba->hw->start_delivery(hisi_hba);
	spin_unlock_irqrestore(&hisi_hba->lock, flags);
		hisi_hba->hw->start_delivery(dq);
	spin_unlock_irqrestore(&dq->lock, flags);

	return rc;
}
@@ -421,12 +433,16 @@ static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device)
	spin_lock(&hisi_hba->lock);
	for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) {
		if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
			int queue = i % hisi_hba->queue_count;
			struct hisi_sas_dq *dq = &hisi_hba->dq[queue];

			hisi_hba->devices[i].device_id = i;
			sas_dev = &hisi_hba->devices[i];
			sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
			sas_dev->dev_type = device->dev_type;
			sas_dev->hisi_hba = hisi_hba;
			sas_dev->sas_device = device;
			sas_dev->dq = dq;
			INIT_LIST_HEAD(&hisi_hba->devices[i].list);
			break;
		}
@@ -1140,8 +1156,9 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
	struct hisi_sas_slot *slot;
	struct asd_sas_port *sas_port = device->port;
	struct hisi_sas_cmd_hdr *cmd_hdr_base;
	struct hisi_sas_dq *dq = sas_dev->dq;
	int dlvry_queue_slot, dlvry_queue, n_elem = 0, rc, slot_idx;
	unsigned long flags;
	unsigned long flags, flags_dq;

	if (unlikely(test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)))
		return -EINVAL;
@@ -1152,14 +1169,22 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
	port = to_hisi_sas_port(sas_port);

	/* simply get a slot and send abort command */
	spin_lock_irqsave(&hisi_hba->lock, flags);
	rc = hisi_sas_slot_index_alloc(hisi_hba, &slot_idx);
	if (rc)
	if (rc) {
		spin_unlock_irqrestore(&hisi_hba->lock, flags);
		goto err_out;
	rc = hisi_hba->hw->get_free_slot(hisi_hba, sas_dev->device_id,
					&dlvry_queue, &dlvry_queue_slot);
	}
	spin_unlock_irqrestore(&hisi_hba->lock, flags);

	spin_lock_irqsave(&dq->lock, flags_dq);
	rc = hisi_hba->hw->get_free_slot(hisi_hba, dq);
	if (rc)
		goto err_out_tag;

	dlvry_queue = dq->id;
	dlvry_queue_slot = dq->wr_point;

	slot = &hisi_hba->slot_info[slot_idx];
	memset(slot, 0, sizeof(struct hisi_sas_slot));

@@ -1186,17 +1211,21 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id,
	task->task_state_flags |= SAS_TASK_AT_INITIATOR;
	spin_unlock_irqrestore(&task->task_state_lock, flags);

	hisi_hba->slot_prep = slot;
	dq->slot_prep = slot;

	atomic64_inc(&sas_dev->running_req);

	/* send abort command to our chip */
	hisi_hba->hw->start_delivery(hisi_hba);
	/* send abort command to the chip */
	hisi_hba->hw->start_delivery(dq);
	spin_unlock_irqrestore(&dq->lock, flags_dq);

	return 0;

err_out_tag:
	spin_lock_irqsave(&hisi_hba->lock, flags);
	hisi_sas_slot_index_free(hisi_hba, slot_idx);
	spin_unlock_irqrestore(&hisi_hba->lock, flags);
	spin_unlock_irqrestore(&dq->lock, flags_dq);
err_out:
	dev_err(dev, "internal abort task prep: failed[%d]!\n", rc);

@@ -1221,7 +1250,6 @@ hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
	struct hisi_sas_device *sas_dev = device->lldd_dev;
	struct device *dev = &hisi_hba->pdev->dev;
	int res;
	unsigned long flags;

	if (!hisi_hba->hw->prep_abort)
		return -EOPNOTSUPP;
@@ -1238,11 +1266,8 @@ hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
	task->slow_task->timer.expires = jiffies + msecs_to_jiffies(110);
	add_timer(&task->slow_task->timer);

	/* Lock as we are alloc'ing a slot, which cannot be interrupted */
	spin_lock_irqsave(&hisi_hba->lock, flags);
	res = hisi_sas_internal_abort_task_exec(hisi_hba, sas_dev->device_id,
						task, abort_flag, tag);
	spin_unlock_irqrestore(&hisi_hba->lock, flags);
	if (res) {
		del_timer(&task->slow_task->timer);
		dev_err(dev, "internal task abort: executing internal task failed: %d\n",
+8 −15
Original line number Diff line number Diff line
@@ -900,22 +900,17 @@ static int get_wideport_bitmap_v1_hw(struct hisi_hba *hisi_hba, int port_id)
	return bitmap;
}

/**
 * This function allocates across all queues to load balance.
 * Slots are allocated from queues in a round-robin fashion.
 *
/*
 * The callpath to this function and upto writing the write
 * queue pointer should be safe from interruption.
 */
static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id,
				int *q, int *s)
static int
get_free_slot_v1_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
{
	struct device *dev = &hisi_hba->pdev->dev;
	struct hisi_sas_dq *dq;
	int queue = dq->id;
	u32 r, w;
	int queue = dev_id % hisi_hba->queue_count;

	dq = &hisi_hba->dq[queue];
	w = dq->wr_point;
	r = hisi_sas_read32_relaxed(hisi_hba,
				DLVRY_Q_0_RD_PTR + (queue * 0x14));
@@ -924,16 +919,14 @@ static int get_free_slot_v1_hw(struct hisi_hba *hisi_hba, u32 dev_id,
		return -EAGAIN;
	}

	*q = queue;
	*s = w;
	return 0;
}

static void start_delivery_v1_hw(struct hisi_hba *hisi_hba)
static void start_delivery_v1_hw(struct hisi_sas_dq *dq)
{
	int dlvry_queue = hisi_hba->slot_prep->dlvry_queue;
	int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot;
	struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue];
	struct hisi_hba *hisi_hba = dq->hisi_hba;
	int dlvry_queue = dq->slot_prep->dlvry_queue;
	int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;

	dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
	hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),
+17 −17
Original line number Diff line number Diff line
@@ -695,6 +695,9 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
		if (sata_dev && (i & 1))
			continue;
		if (hisi_hba->devices[i].dev_type == SAS_PHY_UNUSED) {
			int queue = i % hisi_hba->queue_count;
			struct hisi_sas_dq *dq = &hisi_hba->dq[queue];

			hisi_hba->devices[i].device_id = i;
			sas_dev = &hisi_hba->devices[i];
			sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
@@ -702,6 +705,7 @@ hisi_sas_device *alloc_dev_quirk_v2_hw(struct domain_device *device)
			sas_dev->hisi_hba = hisi_hba;
			sas_dev->sas_device = device;
			sas_dev->sata_idx = sata_idx;
			sas_dev->dq = dq;
			INIT_LIST_HEAD(&hisi_hba->devices[i].list);
			break;
		}
@@ -1454,22 +1458,17 @@ static int get_wideport_bitmap_v2_hw(struct hisi_hba *hisi_hba, int port_id)
	return bitmap;
}

/**
 * This function allocates across all queues to load balance.
 * Slots are allocated from queues in a round-robin fashion.
 *
/*
 * The callpath to this function and upto writing the write
 * queue pointer should be safe from interruption.
 */
static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id,
				int *q, int *s)
static int
get_free_slot_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_dq *dq)
{
	struct device *dev = &hisi_hba->pdev->dev;
	struct hisi_sas_dq *dq;
	int queue = dq->id;
	u32 r, w;
	int queue = dev_id % hisi_hba->queue_count;

	dq = &hisi_hba->dq[queue];
	w = dq->wr_point;
	r = hisi_sas_read32_relaxed(hisi_hba,
				DLVRY_Q_0_RD_PTR + (queue * 0x14));
@@ -1479,16 +1478,14 @@ static int get_free_slot_v2_hw(struct hisi_hba *hisi_hba, u32 dev_id,
		return -EAGAIN;
	}

	*q = queue;
	*s = w;
	return 0;
}

static void start_delivery_v2_hw(struct hisi_hba *hisi_hba)
static void start_delivery_v2_hw(struct hisi_sas_dq *dq)
{
	int dlvry_queue = hisi_hba->slot_prep->dlvry_queue;
	int dlvry_queue_slot = hisi_hba->slot_prep->dlvry_queue_slot;
	struct hisi_sas_dq *dq = &hisi_hba->dq[dlvry_queue];
	struct hisi_hba *hisi_hba = dq->hisi_hba;
	int dlvry_queue = dq->slot_prep->dlvry_queue;
	int dlvry_queue_slot = dq->slot_prep->dlvry_queue_slot;

	dq->wr_point = ++dlvry_queue_slot % HISI_SAS_QUEUE_SLOTS;
	hisi_sas_write32(hisi_hba, DLVRY_Q_0_WR_PTR + (dlvry_queue * 0x14),
@@ -2344,7 +2341,9 @@ slot_complete_v2_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
	spin_lock_irqsave(&task->task_state_lock, flags);
	task->task_state_flags |= SAS_TASK_STATE_DONE;
	spin_unlock_irqrestore(&task->task_state_lock, flags);
	spin_lock_irqsave(&hisi_hba->lock, flags);
	hisi_sas_slot_task_free(hisi_hba, task, slot);
	spin_unlock_irqrestore(&hisi_hba->lock, flags);
	sts = ts->stat;

	if (task->task_done)
@@ -3162,13 +3161,14 @@ static void cq_tasklet_v2_hw(unsigned long val)
	struct hisi_sas_complete_v2_hdr *complete_queue;
	u32 rd_point = cq->rd_point, wr_point, dev_id;
	int queue = cq->id;
	struct hisi_sas_dq *dq = &hisi_hba->dq[queue];

	if (unlikely(hisi_hba->reject_stp_links_msk))
		phys_try_accept_stp_links_v2_hw(hisi_hba);

	complete_queue = hisi_hba->complete_hdr[queue];

	spin_lock(&hisi_hba->lock);
	spin_lock(&dq->lock);
	wr_point = hisi_sas_read32(hisi_hba, COMPL_Q_0_WR_PTR +
				   (0x14 * queue));

@@ -3218,7 +3218,7 @@ static void cq_tasklet_v2_hw(unsigned long val)
	/* update rd_point */
	cq->rd_point = rd_point;
	hisi_sas_write32(hisi_hba, COMPL_Q_0_RD_PTR + (0x14 * queue), rd_point);
	spin_unlock(&hisi_hba->lock);
	spin_unlock(&dq->lock);
}

static irqreturn_t cq_interrupt_v2_hw(int irq_no, void *p)