Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit b71968b4 authored by Praneeth Paladugu's avatar Praneeth Paladugu Committed by Gerrit - the friendly Code Review server
Browse files

msm: vidc: Update Venus HW clock scaling functionality



Current clock scaling code, do not consider bit rate, encoder
decoder concurrencies. Hence modify it to vote for optimal clock
by considering all the parameters that can effect the clock.

CRs-Fixed: 2012520
Change-Id: I332b3d01f0e9c89df2eb73a1b6833ec70219d606
Signed-off-by: default avatarPraneeth Paladugu <ppaladug@codeaurora.org>
parent e060933e
Loading
Loading
Loading
Loading
+19 −27
Original line number Diff line number Diff line
@@ -16,25 +16,6 @@ Optional properties:
- interrupts : should contain the vidc interrupt.
- qcom,platform-version : mask and shift of the platform version bits
    in efuse register.
- qcom,load-freq-tbl : load (in macroblocks/sec) and corresponding vcodec
  clock required along with codec's config, which is a bitmap that describes
  what the clock is used for. The bitmaps are as follows:
    supports mvc encoder = 0x00000001
    supports mvc decoder = 0x00000003
    supports h264 encoder = 0x00000004
    supports h264 decoder = 0x0000000c
    supports mpeg1 encoder = 0x00000040
    supports mpeg1 decoder = 0x000000c0
    supports mpeg2 encoder = 0x00000100
    supports mpeg2 decoder = 0x00000300
    supports vp6 encoder = 0x00100000
    supports vp6 decoder = 0x00300000
    supports vp7 encoder = 0x00400000
    supports vp7 decoder = 0x00c00000
    supports vp8 encoder = 0x01000000
    supports vp8 decoder = 0x03000000
    supports hevc encoder = 0x04000000
    supports hevc decoder = 0x0c000000
- qcom,reg-presets : list of offset-value pairs for registers to be written.
  The offsets are from the base offset specified in 'reg'. This is mainly
  used for QoS, VBIF, etc. presets for video.
@@ -57,9 +38,26 @@ Optional properties:
- qcom,clock-freq-tbl = node containing individual domain nodes, each with:
     - qcom,codec-mask: a bitmap of supported codec types, every two bits
       represents a codec type.
         supports mvc encoder = 0x00000001
         supports mvc decoder = 0x00000003
         supports h264 encoder = 0x00000004
         supports h264 decoder = 0x0000000c
         supports mpeg1 encoder = 0x00000040
         supports mpeg1 decoder = 0x000000c0
         supports mpeg2 encoder = 0x00000100
         supports mpeg2 decoder = 0x00000300
         supports vp6 encoder = 0x00100000
         supports vp6 decoder = 0x00300000
         supports vp7 encoder = 0x00400000
         supports vp7 decoder = 0x00c00000
         supports vp8 encoder = 0x01000000
         supports vp8 decoder = 0x03000000
         supports hevc encoder = 0x04000000
         supports hevc decoder = 0x0c000000
     - qcom,cycles-per-mb: number of cycles required to process each macro
       block.
     - qcom,low-power-mode-factor: the factor which needs to be multiple with
     - qcom,low-power-cycles-per-mb: number of cycles required to process each
       macro block in low power mode.
       the required frequency to get the final frequency, the factor is
       represented in Q16 format.
- qcom,sw-power-collapse = A bool indicating if video hardware core can be
@@ -167,13 +165,6 @@ Example:
		venus-supply = <&gdsc>;
		venus-core0-supply = <&gdsc1>;
		venus-core1-supply = <&gdsc2>;
		qcom,load-freq-tbl =
			<489600 266670000 0x030fcfff>, /* Legacy decoder 1080p 60fps  */
			<108000 133330000 0x030fcfff>, /* Legacy decoder 720p 30fps   */
			<108000 200000000 0x01000414>, /* Legacy encoder 720p 30fps   */
			<72000 133330000 0x0c000000>, /* HEVC decoder VGA 60fps   */
			<36000 133330000 0x0c000000>, /* HEVC VGA 30 fps  */
			<36000 133330000 0x01000414>; /* Legacy encoder VGA 30 fps   */
		qcom,hfi-version = "3xx";
		qcom,reg-presets = <0x80004 0x1>,
			<0x80178 0x00001FFF>;
@@ -190,6 +181,7 @@ Example:
		qcom,use-non-secure-pil;
		qcom,use_dynamic_bw_update;
		qcom,fw-bias = <0xe000000>;
		qcom,allowed-clock-rates = <200000000 300000000 400000000>;
		msm_vidc_cb1: msm_vidc_cb1 {
			compatible = "qcom,msm-vidc,context-bank";
			label = "venus_ns";
+114 −107
Original line number Diff line number Diff line
@@ -41,14 +41,21 @@
		qcom,proxy-clock-names = "core_clk", "iface_clk",
			"bus_clk", "core0_clk", "core1_clk";
		qcom,clock-configs = <0x0 0x0 0x0 0x0 0x0>;

		  qcom,load-freq-tbl =
			  /* Encoders */
			  <1944000 444000000 0x55555555>, /* 4k UHD @ 60 */
			  < 244800 200000000 0x55555555>, /* 720p @ 30 */
			  /* Decoders */
			  <1944000 444000000 0xffffffff>, /* 4k UHD @ 60 */
			  < 244800 200000000 0xffffffff>; /* 1080p @ 30 */
		qcom,allowed-clock-rates = <200000000 320000000 380000000
			444000000 533000000>;
		qcom,clock-freq-tbl {
			qcom,profile-enc {
				qcom,codec-mask = <0x55555555>;
				qcom,vpp-cycles-per-mb = <675>;
				qcom,vsp-cycles-per-mb = <125>;
				qcom,low-power-cycles-per-mb = <320>;
			};
			qcom,profile-dec {
				qcom,codec-mask = <0xffffffff>;
				qcom,vpp-cycles-per-mb = <200>;
				qcom,vsp-cycles-per-mb = <50>;
			};
		};

		/* Buses */
		bus_cnoc {
@@ -59,6 +66,7 @@
			qcom,bus-governor = "performance";
			qcom,bus-range-kbps = <1000 1000>;
		};

		venus_bus_ddr {
			compatible = "qcom,msm-vidc,bus";
			label = "venus-ddr";
@@ -130,6 +138,5 @@
			virtual-addr-pool = <0x1000000 0x24800000>;
			qcom,secure-context-bank;
		};

	};
};
+1 −0
Original line number Diff line number Diff line
@@ -1447,6 +1447,7 @@ int msm_venc_s_ctrl(struct msm_vidc_inst *inst, struct v4l2_ctrl *ctrl)
		bitrate.bit_rate = ctrl->val;
		bitrate.layer_id = 0;
		pdata = &bitrate;
		inst->bitrate = ctrl->val;
		break;
	}
	case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK:
+5 −1
Original line number Diff line number Diff line
@@ -1911,6 +1911,7 @@ void *msm_vidc_open(int core_id, int session_type)

	INIT_MSM_VIDC_LIST(&inst->pendingq);
	INIT_MSM_VIDC_LIST(&inst->scratchbufs);
	INIT_MSM_VIDC_LIST(&inst->freqs);
	INIT_MSM_VIDC_LIST(&inst->persistbufs);
	INIT_MSM_VIDC_LIST(&inst->pending_getpropq);
	INIT_MSM_VIDC_LIST(&inst->outputbufs);
@@ -1921,8 +1922,9 @@ void *msm_vidc_open(int core_id, int session_type)
	inst->session_type = session_type;
	inst->state = MSM_VIDC_CORE_UNINIT_DONE;
	inst->core = core;
	inst->freq = 0;
	inst->bit_depth = MSM_VIDC_BIT_DEPTH_8;
	inst->instant_bitrate = 0;
	inst->bitrate = 0;
	inst->pic_struct = MSM_VIDC_PIC_STRUCT_PROGRESSIVE;
	inst->colour_space = MSM_VIDC_BT601_6_525;
	inst->profile = V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
@@ -2037,6 +2039,8 @@ static void cleanup_instance(struct msm_vidc_inst *inst)
		}
		mutex_unlock(&inst->pendingq.lock);

		msm_comm_free_freq_table(inst);

		if (msm_comm_release_scratch_buffers(inst, false)) {
			dprintk(VIDC_ERR,
				"Failed to release scratch buffers\n");
+346 −12
Original line number Diff line number Diff line
@@ -23,15 +23,153 @@ static bool msm_dcvs_check_supported(struct msm_vidc_inst *inst);
static int msm_dcvs_enc_scale_clocks(struct msm_vidc_inst *inst);
static int msm_dcvs_dec_scale_clocks(struct msm_vidc_inst *inst, bool fbd);

int msm_dcvs_try_enable(struct msm_vidc_inst *inst)
int msm_comm_vote_bus(struct msm_vidc_core *core)
{
	if (!inst) {
		dprintk(VIDC_ERR, "%s: Invalid args: %p\n", __func__, inst);
	int rc = 0, vote_data_count = 0, i = 0;
	struct hfi_device *hdev;
	struct msm_vidc_inst *inst = NULL;
	struct vidc_bus_vote_data *vote_data = NULL;

	if (!core) {
		dprintk(VIDC_ERR, "%s Invalid args: %pK\n", __func__, core);
		return -EINVAL;
	}
	inst->dcvs_mode = msm_dcvs_check_supported(inst);
	return 0;

	hdev = core->device;
	if (!hdev) {
		dprintk(VIDC_ERR, "%s Invalid device handle: %pK\n",
				__func__, hdev);
		return -EINVAL;
	}

	mutex_lock(&core->lock);
	list_for_each_entry(inst, &core->instances, list)
		++vote_data_count;

	vote_data = kcalloc(vote_data_count, sizeof(*vote_data),
			GFP_TEMPORARY);
	if (!vote_data) {
		dprintk(VIDC_ERR, "%s: failed to allocate memory\n", __func__);
		rc = -ENOMEM;
		goto fail_alloc;
	}

	list_for_each_entry(inst, &core->instances, list) {
		int codec = 0, yuv = 0;

		codec = inst->session_type == MSM_VIDC_DECODER ?
			inst->fmts[OUTPUT_PORT].fourcc :
			inst->fmts[CAPTURE_PORT].fourcc;

		yuv = inst->session_type == MSM_VIDC_DECODER ?
			inst->fmts[CAPTURE_PORT].fourcc :
			inst->fmts[OUTPUT_PORT].fourcc;

		vote_data[i].domain = get_hal_domain(inst->session_type);
		vote_data[i].codec = get_hal_codec(codec);
		vote_data[i].width =  max(inst->prop.width[CAPTURE_PORT],
				inst->prop.width[OUTPUT_PORT]);
		vote_data[i].height = max(inst->prop.height[CAPTURE_PORT],
				inst->prop.height[OUTPUT_PORT]);

		if (inst->operating_rate)
			vote_data[i].fps = (inst->operating_rate >> 16) ?
				inst->operating_rate >> 16 : 1;
		else
			vote_data[i].fps = inst->prop.fps;

		/*
		 * TODO: support for OBP-DBP split mode hasn't been yet
		 * implemented, once it is, this part of code needs to be
		 * revisited since passing in accurate information to the bus
		 * governor will drastically reduce bandwidth
		 */
		//vote_data[i].color_formats[0] = get_hal_uncompressed(yuv);
		vote_data[i].num_formats = 1;
		i++;
	}
	mutex_unlock(&core->lock);

	rc = call_hfi_op(hdev, vote_bus, hdev->hfi_device_data, vote_data,
			vote_data_count);
	if (rc)
		dprintk(VIDC_ERR, "Failed to scale bus: %d\n", rc);

	kfree(vote_data);
	return rc;

fail_alloc:
	mutex_unlock(&core->lock);
	return rc;
}

static void msm_vidc_update_freq_entry(struct msm_vidc_inst *inst,
	unsigned long freq, ion_phys_addr_t device_addr)
{
	struct vidc_freq_data *temp, *next;
	bool found = false;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		if (temp->device_addr == device_addr) {
			temp->freq = freq;
			found = true;
			break;
		}
	}

	if (!found) {
		temp = kzalloc(sizeof(*temp), GFP_KERNEL);
		temp->freq = freq;
		temp->device_addr = device_addr;
		list_add_tail(&temp->list, &inst->freqs.list);
	}
	mutex_unlock(&inst->freqs.lock);
}

// TODO this needs to be removed later and use queued_list

void msm_vidc_clear_freq_entry(struct msm_vidc_inst *inst,
	ion_phys_addr_t device_addr)
{
	struct vidc_freq_data *temp, *next;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		if (temp->device_addr == device_addr)
			temp->freq = 0;
	}
	mutex_unlock(&inst->freqs.lock);
}


static unsigned long msm_vidc_get_highest_freq(struct msm_vidc_inst *inst)
{
	struct vidc_freq_data *temp;
	unsigned long freq = 0;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry(temp, &inst->freqs.list, list) {
		freq = max(freq, temp->freq);
	}
	mutex_unlock(&inst->freqs.lock);

	return freq;
}

void msm_comm_free_freq_table(struct msm_vidc_inst *inst)
{
	struct vidc_freq_data *temp, *next;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		list_del(&temp->list);
		kfree(temp);
	}
	INIT_LIST_HEAD(&inst->freqs.list);
	mutex_unlock(&inst->freqs.lock);
}


static inline int msm_dcvs_get_mbs_per_frame(struct msm_vidc_inst *inst)
{
@@ -50,6 +188,168 @@ static inline int msm_dcvs_get_mbs_per_frame(struct msm_vidc_inst *inst)
	return NUM_MBS_PER_FRAME(height, width);
}

static unsigned long msm_vidc_calc_freq(struct msm_vidc_inst *inst,
	u32 filled_len)
{
	unsigned long freq = 0;
	unsigned long vpp_cycles = 0, vsp_cycles = 0;
	u32 vpp_cycles_per_mb;
	u32 mbs_per_frame;

	mbs_per_frame = msm_dcvs_get_mbs_per_frame(inst);

	/*
	 * Calculate vpp, vsp cycles separately for encoder and decoder.
	 * Even though, most part is common now, in future it may change
	 * between them.
	 */

	if (inst->session_type == MSM_VIDC_ENCODER) {
		vpp_cycles_per_mb = inst->flags & VIDC_LOW_POWER ?
			inst->entry->low_power_cycles :
			inst->entry->vpp_cycles;

		vsp_cycles = mbs_per_frame * inst->entry->vsp_cycles;

		/* 10 / 7 is overhead factor */
		vsp_cycles += (inst->bitrate * 10) / 7;
	} else if (inst->session_type == MSM_VIDC_DECODER) {
		vpp_cycles = mbs_per_frame * inst->entry->vpp_cycles;

		vsp_cycles = mbs_per_frame * inst->entry->vsp_cycles;
		/* 10 / 7 is overhead factor */
		vsp_cycles += (inst->prop.fps * filled_len * 8 * 10) / 7;

	} else {
		// TODO return Min or Max ?
		dprintk(VIDC_ERR, "Unknown session type = %s\n", __func__);
		return freq;
	}

	freq = max(vpp_cycles, vsp_cycles);

	return freq;
}

static int msm_vidc_set_clocks(struct msm_vidc_core *core)
{
	struct hfi_device *hdev;
	unsigned long freq = 0, rate = 0;
	struct msm_vidc_inst *temp = NULL;
	int rc = 0, i = 0;
	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;

	hdev = core->device;
	allowed_clks_tbl = core->resources.allowed_clks_tbl;
	if (!hdev || !allowed_clks_tbl) {
		dprintk(VIDC_ERR,
			"%s Invalid parameters\n", __func__);
		return -EINVAL;
	}

	mutex_lock(&core->lock);
	list_for_each_entry(temp, &core->instances, list) {
		freq += temp->freq;
	}
	for (i = core->resources.allowed_clks_tbl_size - 1; i >= 0; i--) {
		rate = allowed_clks_tbl[i].clock_rate;
		if (rate >= freq)
			break;
	}
	mutex_unlock(&core->lock);

	core->freq = rate;
	dprintk(VIDC_PROF, "Voting for freq = %lu", freq);
	rc = call_hfi_op(hdev, scale_clocks,
			hdev->hfi_device_data, rate);

	return rc;
}

static unsigned long msm_vidc_max_freq(struct msm_vidc_inst *inst)
{
	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
	unsigned long freq = 0;

	allowed_clks_tbl = inst->core->resources.allowed_clks_tbl;
	freq = allowed_clks_tbl[0].clock_rate;
	dprintk(VIDC_PROF, "Max rate = %lu", freq);

	return freq;
}

int msm_comm_scale_clocks(struct msm_vidc_inst *inst)
{
	struct vb2_buf_entry *temp, *next;
	unsigned long freq = 0;
	u32 filled_len = 0;
	ion_phys_addr_t device_addr = 0;

	if (inst->count.fbd < DCVS_FTB_WINDOW) {
		freq = msm_vidc_max_freq(inst);
		goto decision_done;
	}

	mutex_lock(&inst->pendingq.lock);
	list_for_each_entry_safe(temp, next, &inst->pendingq.list, list) {
		if (temp->vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
			filled_len = max(filled_len,
				temp->vb->planes[0].bytesused);
			device_addr = temp->vb->planes[0].m.userptr;
		}
	}
	mutex_unlock(&inst->pendingq.lock);

	if (!filled_len || !device_addr) {
		freq = inst->freq;
		goto decision_done;
	}

	freq = msm_vidc_calc_freq(inst, filled_len);

	msm_vidc_update_freq_entry(inst, freq, device_addr);

	freq = msm_vidc_get_highest_freq(inst);

decision_done:
	inst->freq = freq;
	msm_vidc_set_clocks(inst->core);
	return 0;
}

int msm_comm_scale_clocks_and_bus(struct msm_vidc_inst *inst)
{
	struct msm_vidc_core *core;
	struct hfi_device *hdev;

	if (!inst || !inst->core || !inst->core->device) {
		dprintk(VIDC_ERR, "%s Invalid params\n", __func__);
		return -EINVAL;
	}
	core = inst->core;
	hdev = core->device;

	if (msm_comm_scale_clocks(inst)) {
		dprintk(VIDC_WARN,
			"Failed to scale clocks. Performance might be impacted\n");
	}
	if (msm_comm_vote_bus(core)) {
		dprintk(VIDC_WARN,
			"Failed to scale DDR bus. Performance might be impacted\n");
	}
	return 0;
}

int msm_dcvs_try_enable(struct msm_vidc_inst *inst)
{
	if (!inst) {
		dprintk(VIDC_ERR, "%s: Invalid args: %p\n", __func__, inst);
		return -EINVAL;
	}
	inst->dcvs_mode = msm_dcvs_check_supported(inst);
	return 0;
}

static inline int msm_dcvs_count_active_instances(struct msm_vidc_core *core,
	enum session_type session_type)
{
@@ -104,6 +404,44 @@ static bool msm_dcvs_check_codec_supported(int fourcc,
	return codec_type && session_type;
}

int msm_comm_init_clocks_and_bus_data(struct msm_vidc_inst *inst)
{

	int rc = 0, j = 0;
	struct clock_freq_table *clk_freq_tbl = NULL;
	struct clock_profile_entry *entry = NULL;
	int fourcc;

	clk_freq_tbl = &inst->core->resources.clock_freq_tbl;
	fourcc = inst->session_type == MSM_VIDC_DECODER ?
		inst->fmts[OUTPUT_PORT].fourcc :
		inst->fmts[CAPTURE_PORT].fourcc;

	for (j = 0; j < clk_freq_tbl->count; j++) {
		bool matched = false;

		entry = &clk_freq_tbl->clk_prof_entries[j];

		matched = msm_dcvs_check_codec_supported(
				fourcc,
				entry->codec_mask,
				inst->session_type);

		if (matched) {
			inst->entry = entry;
			break;
		}
	}

	if (j == clk_freq_tbl->count) {
		dprintk(VIDC_ERR,
			"Failed : No matching clock entry found\n");
		rc = -EINVAL;
	}

	return rc;
}

static void msm_dcvs_update_dcvs_params(int idx, struct msm_vidc_inst *inst)
{
	struct dcvs_stats *dcvs = NULL;
@@ -439,8 +777,6 @@ static int msm_dcvs_enc_scale_clocks(struct msm_vidc_inst *inst)
			dcvs->prev_freq_lowered ? "Lower" : "Higher",
			dcvs->load, total_input_buf, fw_pending_bufs);

		rc = msm_comm_scale_clocks_load(core, dcvs->load,
				LOAD_CALC_NO_QUIRKS);
		if (rc) {
			dprintk(VIDC_PROF,
				"Failed to set clock rate in FBD: %d\n", rc);
@@ -519,8 +855,6 @@ static int msm_dcvs_dec_scale_clocks(struct msm_vidc_inst *inst, bool fbd)
			dcvs->load, total_output_buf, buffers_outside_fw,
			dcvs->threshold_disp_buf_high, dcvs->transition_turbo);

		rc = msm_comm_scale_clocks_load(core, dcvs->load,
				LOAD_CALC_NO_QUIRKS);
		if (rc) {
			dprintk(VIDC_ERR,
				"Failed to set clock rate in FBD: %d\n", rc);
@@ -624,7 +958,7 @@ static bool msm_dcvs_check_supported(struct msm_vidc_inst *inst)
	}
dcvs_decision_done:
	if (!is_dcvs_supported) {
		msm_comm_scale_clocks(core);
		msm_comm_scale_clocks(inst);
		if (instance_count > 1) {
			mutex_lock(&core->lock);
			list_for_each_entry(temp, &core->instances, list)
Loading