Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3975a4b2 authored by Gilad Broner's avatar Gilad Broner
Browse files

scsi: ufs: update pm qos implementation



Current UFS PM QoS design and implementation do not seem to give
the desired increase in performance. This change revisits the
PM QoS implementation trying to improve performance by making
the following changes:
* de-couple voting from clock scaling decision so voting
  occurs from the first request and unvoting on the completion
  of the last request regardless to clock scaling state.
  Otherwise, suspending the PM QoS voting during the time it takes
  to decide on clock up-scaling, seems to degrade random access
  performance.
* vote on a per-cluster basis by inspecting the request object's
  cpu field. This follows the soft-irq allocation scheme in the
  block layer, so the cpu to which the block layer schedules the
  soft-irq will not be put into deep LPM.
  We should note that PM QoS voting using cpu mask for specific
  cpus is a feature of the qcom specific PM QoS implementation.

Change-Id: I427d202aeb45cd284a3bb128e26e519212614801
Signed-off-by: default avatarGilad Broner <gbroner@codeaurora.org>
Signed-off-by: default avatarKrishna Konda <kkonda@codeaurora.org>
parent 819f45d3
Loading
Loading
Loading
Loading
+17 −17
Original line number Diff line number Diff line
@@ -115,23 +115,19 @@ Note: The instantaneous bandwidth (IB) value in the vectors-KBps field should
- qcom,bus-vector-names: specifies string IDs for the corresponding
bus vectors in the same order as qcom,msm-bus,vectors-KBps property.

- qcom,cpu-dma-latency-us: optional parameter specifying the allowed CPU-DMA
  latency parameter for PM QOS, in units of microseconds. If this parameter is
  not specified a default of 200us is used.
- qcom,cpu-affinity: this is a string that specifies the pm QoS request type.
  The supported cpu affinity modes are:
  "all_cores" - PM_QOS_REQ_ALL_CORES is applicable to all CPU cores that are
  online and this would have a power impact when there are more number of CPUs.
  "affine_irq" - PM_QOS_REQ_AFFINE_IRQ request type shall update/apply the vote
  only to that CPU to which this IRQ's affinity is set to.
  "affine_cores" - PM_QOS_REQ_AFFINE_CORES request type is used for targets that
  have little cluster and will update/apply the vote to all the cores in the
  little cluster.
  The default CPU affinity mode is PM_QOS_REQ_AFFINE_IRQ.
- qcom,cpu-affinity-mask: this property is taken into consideration only in case
  "affine_cores" is specified for qcom,cpu-affinity. It specifies which cores the
  PM QoS voting should apply to. In practice, for system with big / little cluster
  configuration, this should specify the cores of the little cluster.
* The following parameters are optional, but required in order for PM QoS to be
enabled and functional in the driver:
- qcom,pm-qos-cpu-groups:		arrays of unsigned integers representing the cpu groups.
					The number of values in the array defines the number of cpu-groups.
					Each value is a bit-mask defining the cpus that take part in that cpu group.
					i.e. if bit N is set, then cpuN is a part of the cpu group. So basically,
					a cpu group corelated to a cpu cluster.
					A PM QoS request object is maintained for each cpu-group.
- qcom,pm-qos-cpu-group-latency-us:	array of values used for PM QoS voting, one for each cpu-group defined.
					the number of values must match the number of values defined in
					qcom,pm-qos-cpu-mask property.
- qcom,pm-qos-default-cpu:		PM QoS voting is based on the cpu associated with each IO request by the block layer.
					This defined the default cpu used for PM QoS voting in case a specific cpu value is not available.

Example:
	ufshc@0xfc598000 {
@@ -176,6 +172,10 @@ Example:
					"HS_RB_G1_L1", "HS_RB_G2_L1", "HS_RB_G3_L1",
					"HS_RB_G1_L2", "HS_RB_G2_L2", "HS_RB_G3_L2",
					"MAX";

		qcom,pm-qos-cpu-groups = <0x03 0x0C>; /* group0: cpu0, cpu1, group1: cpu2, cpu3 */
		qcom,pm-qos-cpu-group-latency-us = <200 300>; /* group0: 200us, group1: 300us */
		qcom,pm-qos-default-cpu = <0>;
	};

This is an example to a variant sub-node of ufshc:
+273 −0
Original line number Diff line number Diff line
@@ -29,6 +29,9 @@
#include "ufs-qcom-debugfs.h"
#include <linux/clk/msm-clk.h>

/* TODO: further tuning for this parameter may be required */
#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US	(10000) /* microseconds */

#define UFS_QCOM_DEFAULT_DBG_PRINT_EN	\
	(UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN)

@@ -58,6 +61,9 @@ static int ufs_qcom_update_sec_cfg(struct ufs_hba *hba, bool restore_sec_cfg);
static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host);
static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba,
						       u32 clk_cycles);
static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host);
static void ufs_qcom_pm_qos_remove(struct ufs_qcom_host *host);
static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host);

static void ufs_qcom_dump_regs(struct ufs_hba *hba, int offset, int len,
		char *prefix)
@@ -619,6 +625,9 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
		ufs_qcom_ice_suspend(host);
	}

	/* Unvote PM QoS */
	ufs_qcom_pm_qos_suspend(host);

out:
	return ret;
}
@@ -1269,6 +1278,259 @@ out:
	return err;
}

#ifdef CONFIG_SMP /* CONFIG_SMP */
static int ufs_qcom_cpu_to_group(struct ufs_qcom_host *host, int cpu)
{
	int i;

	if (cpu >= 0 && cpu < num_possible_cpus())
		for (i = 0; i < host->pm_qos.num_groups; i++)
			if (cpumask_test_cpu(cpu, &host->pm_qos.groups[i].mask))
				return i;

	return host->pm_qos.default_cpu;
}

static void ufs_qcom_pm_qos_req_start(struct ufs_hba *hba, struct request *req)
{
	unsigned long flags;
	struct ufs_qcom_host *host;
	struct ufs_qcom_pm_qos_cpu_group *group;

	if (!hba || !req)
		return;

	host = ufshcd_get_variant(hba);
	if (!host->pm_qos.groups)
		return;

	group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req->cpu)];

	spin_lock_irqsave(hba->host->host_lock, flags);
	if (!host->pm_qos.is_enabled)
		goto out;

	group->active_reqs++;
	if (group->state != PM_QOS_REQ_VOTE &&
			group->state != PM_QOS_VOTED) {
		group->state = PM_QOS_REQ_VOTE;
		queue_work(host->pm_qos.workq, &group->vote_work);
	}
out:
	spin_unlock_irqrestore(hba->host->host_lock, flags);
}

/* hba->host->host_lock is assumed to be held by caller */
static void __ufs_qcom_pm_qos_req_end(struct ufs_qcom_host *host, int req_cpu)
{
	struct ufs_qcom_pm_qos_cpu_group *group;

	if (!host->pm_qos.groups || !host->pm_qos.is_enabled)
		return;

	group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req_cpu)];

	if (--group->active_reqs)
		return;
	group->state = PM_QOS_REQ_UNVOTE;
	queue_work(host->pm_qos.workq, &group->unvote_work);
}

static void ufs_qcom_pm_qos_req_end(struct ufs_hba *hba, struct request *req,
	bool should_lock)
{
	unsigned long flags;

	if (!hba || !req)
		return;

	if (should_lock)
		spin_lock_irqsave(hba->host->host_lock, flags);
	__ufs_qcom_pm_qos_req_end(ufshcd_get_variant(hba), req->cpu);
	if (should_lock)
		spin_unlock_irqrestore(hba->host->host_lock, flags);
}

static void ufs_qcom_pm_qos_vote_work(struct work_struct *work)
{
	struct ufs_qcom_pm_qos_cpu_group *group =
		container_of(work, struct ufs_qcom_pm_qos_cpu_group, vote_work);
	struct ufs_qcom_host *host = group->host;
	unsigned long flags;

	spin_lock_irqsave(host->hba->host->host_lock, flags);

	if (!host->pm_qos.is_enabled || !group->active_reqs) {
		spin_unlock_irqrestore(host->hba->host->host_lock, flags);
		return;
	}

	group->state = PM_QOS_VOTED;
	spin_unlock_irqrestore(host->hba->host->host_lock, flags);

	pm_qos_update_request(&group->req, group->latency_us);
}

static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work)
{
	struct ufs_qcom_pm_qos_cpu_group *group = container_of(work,
		struct ufs_qcom_pm_qos_cpu_group, unvote_work);
	struct ufs_qcom_host *host = group->host;
	unsigned long flags;

	/*
	 * Check if new requests were submitted in the meantime and do not
	 * unvote if so.
	 */
	spin_lock_irqsave(host->hba->host->host_lock, flags);

	if (!host->pm_qos.is_enabled || group->active_reqs) {
		spin_unlock_irqrestore(host->hba->host->host_lock, flags);
		return;
	}

	group->state = PM_QOS_UNVOTED;
	spin_unlock_irqrestore(host->hba->host->host_lock, flags);

	pm_qos_update_request_timeout(&group->req,
		group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US);
}

static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host)
{
	struct device_node *node = host->hba->dev->of_node;
	int ret = 0;
	int num_groups;
	int num_values;
	char wq_name[sizeof("ufs_pm_qos_00")];
	int i;

	num_groups = of_property_count_u32_elems(node,
		"qcom,pm-qos-cpu-groups");
	if (num_groups <= 0)
		goto no_pm_qos;

	num_values = of_property_count_u32_elems(node,
		"qcom,pm-qos-cpu-group-latency-us");
	if (num_values <= 0)
		goto no_pm_qos;

	if (num_values != num_groups || num_groups > num_possible_cpus()) {
		dev_err(host->hba->dev, "%s: invalid count: num_groups=%d, num_values=%d, num_possible_cpus=%d\n",
			__func__, num_groups, num_values, num_possible_cpus());
		goto no_pm_qos;
	}

	host->pm_qos.num_groups = num_groups;
	host->pm_qos.groups = kcalloc(host->pm_qos.num_groups,
			sizeof(struct ufs_qcom_pm_qos_cpu_group), GFP_KERNEL);
	if (!host->pm_qos.groups)
		return -ENOMEM;

	for (i = 0; i < host->pm_qos.num_groups; i++) {
		u32 mask;

		ret = of_property_read_u32_index(node, "qcom,pm-qos-cpu-groups",
			i, &mask);
		if (ret)
			goto free_groups;
		host->pm_qos.groups[i].mask.bits[0] = mask;
		if (!cpumask_subset(&host->pm_qos.groups[i].mask,
			cpu_possible_mask)) {
			dev_err(host->hba->dev, "%s: invalid mask 0x%x for cpu group\n",
				__func__, mask);
			goto free_groups;
		}

		ret = of_property_read_u32_index(node,
			"qcom,pm-qos-cpu-group-latency-us", i,
			&host->pm_qos.groups[i].latency_us);
		if (ret)
			goto free_groups;

		host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_CORES;
		host->pm_qos.groups[i].req.cpus_affine =
			host->pm_qos.groups[i].mask;
		host->pm_qos.groups[i].state = PM_QOS_UNVOTED;
		host->pm_qos.groups[i].active_reqs = 0;
		host->pm_qos.groups[i].host = host;

		INIT_WORK(&host->pm_qos.groups[i].vote_work,
			ufs_qcom_pm_qos_vote_work);
		INIT_WORK(&host->pm_qos.groups[i].unvote_work,
			ufs_qcom_pm_qos_unvote_work);
	}

	ret = of_property_read_u32(node, "qcom,pm-qos-default-cpu",
		&host->pm_qos.default_cpu);
	if (ret || host->pm_qos.default_cpu > num_possible_cpus())
		host->pm_qos.default_cpu = 0;

	/*
	 * Use a single-threaded workqueue to assure work submitted to the queue
	 * is performed in order. Consider the following 2 possible cases:
	 *
	 * 1. A new request arrives and voting work is scheduled for it. Before
	 *    the voting work is performed the request is finished and unvote
	 *    work is also scheduled.
	 * 2. A request is finished and unvote work is scheduled. Before the
	 *    work is performed a new request arrives and voting work is also
	 *    scheduled.
	 *
	 * In both cases a vote work and unvote work wait to be performed.
	 * If ordering is not guaranteed, then the end state might be the
	 * opposite of the desired state.
	 */
	snprintf(wq_name, ARRAY_SIZE(wq_name), "%s_%d", "ufs_pm_qos",
		host->hba->host->host_no);
	host->pm_qos.workq = create_singlethread_workqueue(wq_name);
	if (!host->pm_qos.workq) {
		dev_err(host->hba->dev, "%s: failed to create the workqueue\n",
				__func__);
		ret = -ENOMEM;
		goto free_groups;
	}

	/* Initialization was ok, add all PM QoS requests */
	for (i = 0; i < host->pm_qos.num_groups; i++)
		pm_qos_add_request(&host->pm_qos.groups[i].req,
			PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);

	host->pm_qos.is_enabled = true;

	return 0;

free_groups:
	kfree(host->pm_qos.groups);
no_pm_qos:
	host->pm_qos.groups = NULL;
	return ret ? ret : -ENOTSUPP;
}

static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host)
{
	int i;

	for (i = 0; i < host->pm_qos.num_groups; i++)
		flush_work(&host->pm_qos.groups[i].unvote_work);
}

static void ufs_qcom_pm_qos_remove(struct ufs_qcom_host *host)
{
	int i;

	if (!host->pm_qos.groups)
		return;

	for (i = 0; i < host->pm_qos.num_groups; i++)
		pm_qos_remove_request(&host->pm_qos.groups[i].req);
	destroy_workqueue(host->pm_qos.workq);

	kfree(host->pm_qos.groups);
	host->pm_qos.groups = NULL;
}
#endif /* CONFIG_SMP */

#define	ANDROID_BOOT_DEV_MAX	30
static char android_boot_dev[ANDROID_BOOT_DEV_MAX];

@@ -1345,6 +1607,10 @@ static int ufs_qcom_init(struct ufs_hba *hba)
		goto out;
	}

	err = ufs_qcom_pm_qos_init(host);
	if (err)
		dev_info(dev, "%s: PM QoS will be disabled\n", __func__);

	/* restore the secure configuration */
	ufs_qcom_update_sec_cfg(hba, true);

@@ -1430,6 +1696,7 @@ static void ufs_qcom_exit(struct ufs_hba *hba)
	msm_bus_scale_unregister_client(host->bus_vote.client_handle);
	ufs_qcom_disable_lane_clks(host);
	phy_power_off(host->generic_phy);
	ufs_qcom_pm_qos_remove(host);
}

static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba,
@@ -1836,10 +2103,16 @@ static struct ufs_hba_crypto_variant_ops ufs_hba_crypto_variant_ops = {
	.crypto_engine_reset_err = ufs_qcom_crypto_engine_reset_err,
};

static struct ufs_hba_pm_qos_variant_ops ufs_hba_pm_qos_variant_ops = {
	.req_start	= ufs_qcom_pm_qos_req_start,
	.req_end	= ufs_qcom_pm_qos_req_end,
};

static struct ufs_hba_variant ufs_hba_qcom_variant = {
	.name		= "qcom",
	.vops		= &ufs_hba_qcom_vops,
	.crypto_vops	= &ufs_hba_crypto_variant_ops,
	.pm_qos_vops	= &ufs_hba_pm_qos_variant_ops,
};

/**
+53 −0
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#define UFS_QCOM_H_

#include <linux/phy/phy.h>
#include <linux/pm_qos.h>
#include "ufshcd.h"

#define MAX_UFS_QCOM_HOSTS	1
@@ -234,6 +235,54 @@ struct ufs_qcom_testbus {
	u8 select_minor;
};

/* PM QoS voting state  */
enum ufs_qcom_pm_qos_state {
	PM_QOS_UNVOTED,
	PM_QOS_VOTED,
	PM_QOS_REQ_VOTE,
	PM_QOS_REQ_UNVOTE,
};

/**
 * struct ufs_qcom_pm_qos_cpu_group - data related to cluster PM QoS voting
 *	logic
 * @req: request object for PM QoS
 * @vote_work: work object for voting procedure
 * @unvote_work: work object for un-voting procedure
 * @host: back pointer to the main structure
 * @state: voting state machine current state
 * @latency_us: requested latency value used for cluster voting, in
 *	microseconds
 * @mask: cpu mask defined for this cluster
 * @active_reqs: number of active requests on this cluster
 */
struct ufs_qcom_pm_qos_cpu_group {
	struct pm_qos_request req;
	struct work_struct vote_work;
	struct work_struct unvote_work;
	struct ufs_qcom_host *host;
	enum ufs_qcom_pm_qos_state state;
	s32 latency_us;
	cpumask_t mask;
	int active_reqs;
};

/**
 * struct ufs_qcom_pm_qos - data related to PM QoS voting logic
 * @groups: PM QoS cpu group state array
 * @workq: single threaded workqueue to run PM QoS voting/unvoting
 * @num_clusters: number of clusters defined
 * @default_cpu: cpu to use for voting for request not specifying a cpu
 * @is_enabled: flag specifying whether voting logic is enabled
 */
struct ufs_qcom_pm_qos {
	struct ufs_qcom_pm_qos_cpu_group *groups;
	struct workqueue_struct *workq;
	int num_groups;
	int default_cpu;
	bool is_enabled;
};

struct ufs_qcom_host {
	/*
	 * Set this capability if host controller supports the QUniPro mode
@@ -258,6 +307,10 @@ struct ufs_qcom_host {
	struct clk *tx_l0_sync_clk;
	struct clk *rx_l1_sync_clk;
	struct clk *tx_l1_sync_clk;

	/* PM Quality-of-Service (QoS) data */
	struct ufs_qcom_pm_qos pm_qos;

	bool is_lane_clks_enabled;
	bool sec_cfg_updated;
	struct ufs_qcom_ice_data ice;
+0 −38
Original line number Diff line number Diff line
@@ -237,43 +237,6 @@ static void ufshcd_parse_pm_levels(struct ufs_hba *hba)
}

#ifdef CONFIG_SMP
static void ufshcd_parse_pm_qos(struct ufs_hba *hba, int irq)
{
	const char *cpu_affinity = NULL;
	u32 cpu_mask;

	hba->pm_qos.cpu_dma_latency_us = UFS_DEFAULT_CPU_DMA_LATENCY_US;
	of_property_read_u32(hba->dev->of_node, "qcom,cpu-dma-latency-us",
		&hba->pm_qos.cpu_dma_latency_us);
	dev_dbg(hba->dev, "cpu_dma_latency_us = %u\n",
		hba->pm_qos.cpu_dma_latency_us);

	/* Default to affine irq in case parsing fails */
	hba->pm_qos.req.type = PM_QOS_REQ_AFFINE_IRQ;
	hba->pm_qos.req.irq = irq;
	if (!of_property_read_string(hba->dev->of_node, "qcom,cpu-affinity",
		&cpu_affinity)) {
		if (!strcmp(cpu_affinity, "all_cores"))
			hba->pm_qos.req.type = PM_QOS_REQ_ALL_CORES;
		else if (!strcmp(cpu_affinity, "affine_cores"))
			/*
			 * PM_QOS_REQ_AFFINE_CORES request type is used for
			 * targets that have little cluster and will apply
			 * the vote to all the cores in the little cluster.
			 */
			if (!of_property_read_u32(hba->dev->of_node,
				"qcom,cpu-affinity-mask", &cpu_mask)) {
				hba->pm_qos.req.type = PM_QOS_REQ_AFFINE_CORES;
				/* Convert u32 to cpu bit mask */
				cpumask_bits(&hba->pm_qos.req.cpus_affine)[0] =
					cpu_mask;
			}
	}

	dev_dbg(hba->dev, "hba->pm_qos.pm_qos_req.type = %u, cpu_mask=0x%lx\n",
		hba->pm_qos.req.type, hba->pm_qos.req.cpus_affine.bits[0]);
}

/**
 * ufshcd_pltfrm_suspend - suspend power management function
 * @dev: pointer to device handle
@@ -395,7 +358,6 @@ static int ufshcd_pltfrm_probe(struct platform_device *pdev)
		goto dealloc_host;
	}

	ufshcd_parse_pm_qos(hba, irq);
	ufshcd_parse_pm_levels(hba);

	if (!dev->dma_mask)
+15 −207
Original line number Diff line number Diff line
@@ -220,8 +220,6 @@ void ufshcd_update_query_stats(struct ufs_hba *hba,
/* IOCTL opcode for command - ufs set device read only */
#define UFS_IOCTL_BLKROSET      BLKROSET

#define UFSHCD_PM_QOS_UNVOTE_TIMEOUT_US	(10000) /* microseconds */

#define UFSHCD_DEFAULT_LANES_PER_DIRECTION		2

#define ufshcd_toggle_vreg(_dev, _vreg, _on)				\
@@ -609,9 +607,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba)
		hba->pm_op_in_progress, hba->is_sys_suspended);
	dev_err(hba->dev, "Auto BKOPS=%d, Host self-block=%d\n",
		hba->auto_bkops_enabled, hba->host->host_self_blocked);
	dev_err(hba->dev, "Clk gate=%d, hibern8 on idle=%d, PM QoS=%d\n",
		hba->clk_gating.state, hba->hibern8_on_idle.state,
		hba->pm_qos.state);
	dev_err(hba->dev, "Clk gate=%d, hibern8 on idle=%d\n",
		hba->clk_gating.state, hba->hibern8_on_idle.state);
	dev_err(hba->dev, "error handling flags=0x%x, req. abort count=%d\n",
		hba->eh_flags, hba->req_abort_count);
	dev_err(hba->dev, "Host capabilities=0x%x, caps=0x%x\n",
@@ -1734,173 +1731,15 @@ static void ufshcd_exit_hibern8_on_idle(struct ufs_hba *hba)
	device_remove_file(hba->dev, &hba->hibern8_on_idle.enable_attr);
}

#ifdef CONFIG_SMP

/* Host lock is assumed to be held by caller */
static int ufshcd_pm_qos_hold(struct ufs_hba *hba, bool async)
{
	int ret = 0;
	unsigned long flags;

	if (!hba->pm_qos.cpu_dma_latency_us)
		return 0;

	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->pm_qos.active_reqs++;
	if (hba->pm_qos.is_suspended)
		goto out;
start:
	switch (hba->pm_qos.state) {
	case PM_QOS_VOTED:
		/* nothing to do */
		break;
	case PM_QOS_REQ_UNVOTE:
		/*
		 * Fall-through - unvoting is either running or completed,
		 * so need to perform voting.
		 */
	case PM_QOS_UNVOTED:
		scsi_block_requests(hba->host);
		hba->pm_qos.state = PM_QOS_REQ_VOTE;
		schedule_work(&hba->pm_qos.vote_work);
		/* fall-through */
	case PM_QOS_REQ_VOTE:
		if (async) {
			hba->pm_qos.active_reqs--;
			ret = -EAGAIN;
			break;
		}
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		flush_work(&hba->pm_qos.vote_work);
		spin_lock_irqsave(hba->host->host_lock, flags);
		goto start;
	default:
		dev_err(hba->dev, "%s: PM QoS invalid state %d\n", __func__,
			hba->pm_qos.state);
		ret = -EINVAL;
		break;
	}
out:
	spin_unlock_irqrestore(hba->host->host_lock, flags);
	return ret;
}

/* Host lock is assumed to be held by caller */
static void __ufshcd_pm_qos_release(struct ufs_hba *hba, bool no_sched)
{
	if (!hba->pm_qos.cpu_dma_latency_us)
		return;

	if (--hba->pm_qos.active_reqs || no_sched)
		return;

	hba->pm_qos.state = PM_QOS_REQ_UNVOTE;
	schedule_work(&hba->pm_qos.unvote_work);
}

static void ufshcd_pm_qos_release(struct ufs_hba *hba, bool no_sched)
{
	unsigned long flags;

	spin_lock_irqsave(hba->host->host_lock, flags);
	__ufshcd_pm_qos_release(hba, no_sched);
	spin_unlock_irqrestore(hba->host->host_lock, flags);
}

static void ufshcd_pm_qos_vote_work(struct work_struct *work)
{
	struct ufshcd_pm_qos *ufs_pm_qos =
		container_of(work, struct ufshcd_pm_qos, vote_work);
	struct ufs_hba *hba = container_of(ufs_pm_qos, struct ufs_hba, pm_qos);
	unsigned long flags;

	/*
	 * un-voting work might be running when a new request arrives
	 * and causes voting work to schedule. To prevent race condition
	 * make sure the un-voting is finished.
	 */
	cancel_work_sync(&hba->pm_qos.unvote_work);

	pm_qos_update_request(&hba->pm_qos.req,
		hba->pm_qos.cpu_dma_latency_us);

	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->pm_qos.state = PM_QOS_VOTED;
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	scsi_unblock_requests(hba->host);
}

static void ufshcd_pm_qos_unvote_work(struct work_struct *work)
{
	struct ufshcd_pm_qos *ufs_pm_qos =
		container_of(work, struct ufshcd_pm_qos, unvote_work);
	struct ufs_hba *hba = container_of(ufs_pm_qos, struct ufs_hba, pm_qos);
	unsigned long flags;

	/*
	 * Check if new requests were submitted in the meantime and do not
	 * unvote if so.
	 */
	spin_lock_irqsave(hba->host->host_lock, flags);
	if (hba->pm_qos.active_reqs) {
		spin_unlock_irqrestore(hba->host->host_lock, flags);
		return;
	}
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	/*
	 * When PM QoS voting is suspended (clocks scaled down or PM suspend
	 * taking place) we can un-vote immediately. Otherwise, un-voting is
	 * best done a bit later to accommodate for a burst of new upcoming
	 * requests.
	 */
	if (hba->pm_qos.is_suspended)
		pm_qos_update_request(&hba->pm_qos.req, PM_QOS_DEFAULT_VALUE);
	else
		pm_qos_update_request_timeout(&hba->pm_qos.req,
			PM_QOS_DEFAULT_VALUE, UFSHCD_PM_QOS_UNVOTE_TIMEOUT_US);

	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->pm_qos.state = PM_QOS_UNVOTED;
	spin_unlock_irqrestore(hba->host->host_lock, flags);
}

static int ufshcd_pm_qos_init(struct ufs_hba *hba)
{
	if (hba->pm_qos.cpu_dma_latency_us)
		pm_qos_add_request(&hba->pm_qos.req,
			PM_QOS_CPU_DMA_LATENCY, hba->pm_qos.cpu_dma_latency_us);
	else
		pm_qos_add_request(&hba->pm_qos.req,
			PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
	hba->pm_qos.state = PM_QOS_VOTED;
	hba->pm_qos.active_reqs = 0;
	hba->pm_qos.is_suspended = false;
	INIT_WORK(&hba->pm_qos.vote_work, ufshcd_pm_qos_vote_work);
	INIT_WORK(&hba->pm_qos.unvote_work, ufshcd_pm_qos_unvote_work);

	return 0;
}

static void ufshcd_pm_qos_remove(struct ufs_hba *hba)
{
	pm_qos_remove_request(&hba->pm_qos.req);
}

#endif /* CONFIG_SMP */

static void ufshcd_hold_all(struct ufs_hba *hba)
{
	ufshcd_hold(hba, false);
	ufshcd_pm_qos_hold(hba, false);
	ufshcd_hibern8_hold(hba, false);
}

static void ufshcd_release_all(struct ufs_hba *hba)
{
	ufshcd_hibern8_release(hba, false);
	ufshcd_pm_qos_release(hba, false);
	ufshcd_release(hba, false);
}

@@ -2537,23 +2376,18 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
	}
	WARN_ON(hba->clk_gating.state != CLKS_ON);

	err = ufshcd_pm_qos_hold(hba, true);
	if (err) {
		err = SCSI_MLQUEUE_HOST_BUSY;
		clear_bit_unlock(tag, &hba->lrb_in_use);
		ufshcd_release(hba, true);
		goto out;
	}

	err = ufshcd_hibern8_hold(hba, true);
	if (err) {
		clear_bit_unlock(tag, &hba->lrb_in_use);
		err = SCSI_MLQUEUE_HOST_BUSY;
		ufshcd_pm_qos_release(hba, true);
		ufshcd_release(hba, true);
		goto out;
	}
	WARN_ON(hba->hibern8_on_idle.state != HIBERN8_EXITED);

	/* Vote PM QoS for the request */
	ufshcd_vops_pm_qos_req_start(hba, cmd->request);

	lrbp = &hba->lrb[tag];

	WARN_ON(lrbp->cmd);
@@ -2573,6 +2407,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
		lrbp->cmd = NULL;
		clear_bit_unlock(tag, &hba->lrb_in_use);
		ufshcd_release_all(hba);
		ufshcd_vops_pm_qos_req_end(hba, cmd->request, true);
		goto out;
	}

@@ -2588,6 +2423,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
		lrbp->cmd = NULL;
		clear_bit_unlock(tag, &hba->lrb_in_use);
		ufshcd_release_all(hba);
		ufshcd_vops_pm_qos_req_end(hba, cmd->request, true);
		dev_err(hba->dev, "%s: failed sending command, %d\n",
							__func__, err);
		err = DID_ERROR;
@@ -4957,8 +4793,10 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
			/* Do not touch lrbp after scsi done */
			cmd->scsi_done(cmd);
			__ufshcd_release(hba, false);
			__ufshcd_pm_qos_release(hba, false);
			__ufshcd_hibern8_release(hba, false);
			if (cmd->request)
				ufshcd_vops_pm_qos_req_end(hba, cmd->request,
					false);
		} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
			if (hba->dev_cmd.complete) {
				ufshcd_cond_add_cmd_trace(hba, index,
@@ -7722,7 +7560,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
	enum ufs_pm_level pm_lvl;
	enum ufs_dev_pwr_mode req_dev_pwr_mode;
	enum uic_link_state req_link_state;
	unsigned long flags;

	hba->pm_op_in_progress = 1;
	if (!ufshcd_is_shutdown_pm(pm_op)) {
@@ -7745,13 +7582,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
	hba->clk_gating.is_suspended = true;
	hba->hibern8_on_idle.is_suspended = true;

	/* While entering PM suspend release the PM QoS vote and suspend it */
	if (hba->pm_qos.state == PM_QOS_VOTED) {
		pm_qos_update_request(&hba->pm_qos.req, PM_QOS_DEFAULT_VALUE);
		hba->pm_qos.state = PM_QOS_UNVOTED;
	}
	hba->pm_qos.is_suspended = true;

	ufshcd_suspend_clkscaling(hba);

	if (req_dev_pwr_mode == UFS_ACTIVE_PWR_MODE &&
@@ -7853,9 +7683,6 @@ enable_gating:
	ufshcd_resume_clkscaling(hba);
	hba->hibern8_on_idle.is_suspended = false;
	hba->clk_gating.is_suspended = false;
	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->pm_qos.is_suspended = false;
	spin_unlock_irqrestore(hba->host->host_lock, flags);
	ufshcd_release_all(hba);
out:
	hba->pm_op_in_progress = 0;
@@ -7880,7 +7707,6 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
{
	int ret;
	enum uic_link_state old_link_state;
	unsigned long flags;

	hba->pm_op_in_progress = 1;
	old_link_state = hba->uic_link_state;
@@ -7947,10 +7773,6 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
	hba->clk_gating.is_suspended = false;
	hba->hibern8_on_idle.is_suspended = false;

	spin_lock_irqsave(hba->host->host_lock, flags);
	hba->pm_qos.is_suspended = false;
	spin_unlock_irqrestore(hba->host->host_lock, flags);

	if (hba->clk_scaling.is_allowed)
		ufshcd_resume_clkscaling(hba);

@@ -8634,13 +8456,8 @@ static int ufshcd_devfreq_target(struct device *dev,

	ret = ufshcd_devfreq_scale(hba, scale_up);

	spin_lock_irqsave(hba->host->host_lock, irq_flags);
	/* suspend PM QoS voting when scaled down and vise versa */
	hba->pm_qos.is_suspended = !scale_up;

	if (release_clk_hold)
		__ufshcd_release(hba, false);
	spin_unlock_irqrestore(hba->host->host_lock, irq_flags);
		ufshcd_release(hba, false);

	trace_ufshcd_profile_clk_scaling(dev_name(hba->dev),
		(scale_up ? "up" : "down"),
@@ -8790,13 +8607,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
		goto out_disable;
	}

	/* Configure PM_QOS latency */
	err = ufshcd_pm_qos_init(hba);
	if (err) {
		dev_err(hba->dev, "ufshcd_pm_qos_init failed, err=%d\n", err);
		goto exit_gating;
	}

	/* Configure LRB */
	ufshcd_host_memory_configure(hba);

@@ -8851,7 +8661,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
	err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba);
	if (err) {
		dev_err(hba->dev, "request irq failed\n");
		goto pm_qos_remove;
		goto exit_gating;
	} else {
		hba->is_irq_enabled = true;
	}
@@ -8860,13 +8670,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
	err = scsi_init_shared_tag_map(host, host->can_queue);
	if (err) {
		dev_err(hba->dev, "init shared queue failed\n");
		goto pm_qos_remove;
		goto exit_gating;
	}

	err = scsi_add_host(host, hba->dev);
	if (err) {
		dev_err(hba->dev, "scsi_add_host failed\n");
		goto pm_qos_remove;
		goto exit_gating;
	}

	/* Host controller enable */
@@ -8925,8 +8735,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)

out_remove_scsi_host:
	scsi_remove_host(hba->host);
pm_qos_remove:
	ufshcd_pm_qos_remove(hba);
exit_gating:
	ufshcd_exit_clk_gating(hba);
out_disable:
Loading