Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cb928028 authored by Kyle Yan's avatar Kyle Yan Committed by Gerrit - the friendly Code Review server
Browse files

Merge "msm: vidc: Update Venus HW clock scaling functionality" into msm-4.9

parents 5841594b b71968b4
Loading
Loading
Loading
Loading
+19 −27
Original line number Diff line number Diff line
@@ -16,25 +16,6 @@ Optional properties:
- interrupts : should contain the vidc interrupt.
- qcom,platform-version : mask and shift of the platform version bits
    in efuse register.
- qcom,load-freq-tbl : load (in macroblocks/sec) and corresponding vcodec
  clock required along with codec's config, which is a bitmap that describes
  what the clock is used for. The bitmaps are as follows:
    supports mvc encoder = 0x00000001
    supports mvc decoder = 0x00000003
    supports h264 encoder = 0x00000004
    supports h264 decoder = 0x0000000c
    supports mpeg1 encoder = 0x00000040
    supports mpeg1 decoder = 0x000000c0
    supports mpeg2 encoder = 0x00000100
    supports mpeg2 decoder = 0x00000300
    supports vp6 encoder = 0x00100000
    supports vp6 decoder = 0x00300000
    supports vp7 encoder = 0x00400000
    supports vp7 decoder = 0x00c00000
    supports vp8 encoder = 0x01000000
    supports vp8 decoder = 0x03000000
    supports hevc encoder = 0x04000000
    supports hevc decoder = 0x0c000000
- qcom,reg-presets : list of offset-value pairs for registers to be written.
  The offsets are from the base offset specified in 'reg'. This is mainly
  used for QoS, VBIF, etc. presets for video.
@@ -57,9 +38,26 @@ Optional properties:
- qcom,clock-freq-tbl = node containing individual domain nodes, each with:
     - qcom,codec-mask: a bitmap of supported codec types, every two bits
       represents a codec type.
         supports mvc encoder = 0x00000001
         supports mvc decoder = 0x00000003
         supports h264 encoder = 0x00000004
         supports h264 decoder = 0x0000000c
         supports mpeg1 encoder = 0x00000040
         supports mpeg1 decoder = 0x000000c0
         supports mpeg2 encoder = 0x00000100
         supports mpeg2 decoder = 0x00000300
         supports vp6 encoder = 0x00100000
         supports vp6 decoder = 0x00300000
         supports vp7 encoder = 0x00400000
         supports vp7 decoder = 0x00c00000
         supports vp8 encoder = 0x01000000
         supports vp8 decoder = 0x03000000
         supports hevc encoder = 0x04000000
         supports hevc decoder = 0x0c000000
     - qcom,cycles-per-mb: number of cycles required to process each macro
       block.
     - qcom,low-power-mode-factor: the factor which needs to be multiple with
     - qcom,low-power-cycles-per-mb: number of cycles required to process each
       macro block in low power mode.
       the required frequency to get the final frequency, the factor is
       represented in Q16 format.
- qcom,sw-power-collapse = A bool indicating if video hardware core can be
@@ -167,13 +165,6 @@ Example:
		venus-supply = <&gdsc>;
		venus-core0-supply = <&gdsc1>;
		venus-core1-supply = <&gdsc2>;
		qcom,load-freq-tbl =
			<489600 266670000 0x030fcfff>, /* Legacy decoder 1080p 60fps  */
			<108000 133330000 0x030fcfff>, /* Legacy decoder 720p 30fps   */
			<108000 200000000 0x01000414>, /* Legacy encoder 720p 30fps   */
			<72000 133330000 0x0c000000>, /* HEVC decoder VGA 60fps   */
			<36000 133330000 0x0c000000>, /* HEVC VGA 30 fps  */
			<36000 133330000 0x01000414>; /* Legacy encoder VGA 30 fps   */
		qcom,hfi-version = "3xx";
		qcom,reg-presets = <0x80004 0x1>,
			<0x80178 0x00001FFF>;
@@ -190,6 +181,7 @@ Example:
		qcom,use-non-secure-pil;
		qcom,use_dynamic_bw_update;
		qcom,fw-bias = <0xe000000>;
		qcom,allowed-clock-rates = <200000000 300000000 400000000>;
		msm_vidc_cb1: msm_vidc_cb1 {
			compatible = "qcom,msm-vidc,context-bank";
			label = "venus_ns";
+114 −107
Original line number Diff line number Diff line
@@ -41,14 +41,21 @@
		qcom,proxy-clock-names = "core_clk", "iface_clk",
			"bus_clk", "core0_clk", "core1_clk";
		qcom,clock-configs = <0x0 0x0 0x0 0x0 0x0>;

		  qcom,load-freq-tbl =
			  /* Encoders */
			  <1944000 444000000 0x55555555>, /* 4k UHD @ 60 */
			  < 244800 200000000 0x55555555>, /* 720p @ 30 */
			  /* Decoders */
			  <1944000 444000000 0xffffffff>, /* 4k UHD @ 60 */
			  < 244800 200000000 0xffffffff>; /* 1080p @ 30 */
		qcom,allowed-clock-rates = <200000000 320000000 380000000
			444000000 533000000>;
		qcom,clock-freq-tbl {
			qcom,profile-enc {
				qcom,codec-mask = <0x55555555>;
				qcom,vpp-cycles-per-mb = <675>;
				qcom,vsp-cycles-per-mb = <125>;
				qcom,low-power-cycles-per-mb = <320>;
			};
			qcom,profile-dec {
				qcom,codec-mask = <0xffffffff>;
				qcom,vpp-cycles-per-mb = <200>;
				qcom,vsp-cycles-per-mb = <50>;
			};
		};

		/* Buses */
		bus_cnoc {
@@ -59,6 +66,7 @@
			qcom,bus-governor = "performance";
			qcom,bus-range-kbps = <1000 1000>;
		};

		venus_bus_ddr {
			compatible = "qcom,msm-vidc,bus";
			label = "venus-ddr";
@@ -130,6 +138,5 @@
			virtual-addr-pool = <0x1000000 0x24800000>;
			qcom,secure-context-bank;
		};

	};
};
+1 −0
Original line number Diff line number Diff line
@@ -1447,6 +1447,7 @@ int msm_venc_s_ctrl(struct msm_vidc_inst *inst, struct v4l2_ctrl *ctrl)
		bitrate.bit_rate = ctrl->val;
		bitrate.layer_id = 0;
		pdata = &bitrate;
		inst->bitrate = ctrl->val;
		break;
	}
	case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK:
+5 −1
Original line number Diff line number Diff line
@@ -1911,6 +1911,7 @@ void *msm_vidc_open(int core_id, int session_type)

	INIT_MSM_VIDC_LIST(&inst->pendingq);
	INIT_MSM_VIDC_LIST(&inst->scratchbufs);
	INIT_MSM_VIDC_LIST(&inst->freqs);
	INIT_MSM_VIDC_LIST(&inst->persistbufs);
	INIT_MSM_VIDC_LIST(&inst->pending_getpropq);
	INIT_MSM_VIDC_LIST(&inst->outputbufs);
@@ -1921,8 +1922,9 @@ void *msm_vidc_open(int core_id, int session_type)
	inst->session_type = session_type;
	inst->state = MSM_VIDC_CORE_UNINIT_DONE;
	inst->core = core;
	inst->freq = 0;
	inst->bit_depth = MSM_VIDC_BIT_DEPTH_8;
	inst->instant_bitrate = 0;
	inst->bitrate = 0;
	inst->pic_struct = MSM_VIDC_PIC_STRUCT_PROGRESSIVE;
	inst->colour_space = MSM_VIDC_BT601_6_525;
	inst->profile = V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
@@ -2037,6 +2039,8 @@ static void cleanup_instance(struct msm_vidc_inst *inst)
		}
		mutex_unlock(&inst->pendingq.lock);

		msm_comm_free_freq_table(inst);

		if (msm_comm_release_scratch_buffers(inst, false)) {
			dprintk(VIDC_ERR,
				"Failed to release scratch buffers\n");
+346 −12
Original line number Diff line number Diff line
@@ -23,15 +23,153 @@ static bool msm_dcvs_check_supported(struct msm_vidc_inst *inst);
static int msm_dcvs_enc_scale_clocks(struct msm_vidc_inst *inst);
static int msm_dcvs_dec_scale_clocks(struct msm_vidc_inst *inst, bool fbd);

int msm_dcvs_try_enable(struct msm_vidc_inst *inst)
int msm_comm_vote_bus(struct msm_vidc_core *core)
{
	if (!inst) {
		dprintk(VIDC_ERR, "%s: Invalid args: %p\n", __func__, inst);
	int rc = 0, vote_data_count = 0, i = 0;
	struct hfi_device *hdev;
	struct msm_vidc_inst *inst = NULL;
	struct vidc_bus_vote_data *vote_data = NULL;

	if (!core) {
		dprintk(VIDC_ERR, "%s Invalid args: %pK\n", __func__, core);
		return -EINVAL;
	}
	inst->dcvs_mode = msm_dcvs_check_supported(inst);
	return 0;

	hdev = core->device;
	if (!hdev) {
		dprintk(VIDC_ERR, "%s Invalid device handle: %pK\n",
				__func__, hdev);
		return -EINVAL;
	}

	mutex_lock(&core->lock);
	list_for_each_entry(inst, &core->instances, list)
		++vote_data_count;

	vote_data = kcalloc(vote_data_count, sizeof(*vote_data),
			GFP_TEMPORARY);
	if (!vote_data) {
		dprintk(VIDC_ERR, "%s: failed to allocate memory\n", __func__);
		rc = -ENOMEM;
		goto fail_alloc;
	}

	list_for_each_entry(inst, &core->instances, list) {
		int codec = 0, yuv = 0;

		codec = inst->session_type == MSM_VIDC_DECODER ?
			inst->fmts[OUTPUT_PORT].fourcc :
			inst->fmts[CAPTURE_PORT].fourcc;

		yuv = inst->session_type == MSM_VIDC_DECODER ?
			inst->fmts[CAPTURE_PORT].fourcc :
			inst->fmts[OUTPUT_PORT].fourcc;

		vote_data[i].domain = get_hal_domain(inst->session_type);
		vote_data[i].codec = get_hal_codec(codec);
		vote_data[i].width =  max(inst->prop.width[CAPTURE_PORT],
				inst->prop.width[OUTPUT_PORT]);
		vote_data[i].height = max(inst->prop.height[CAPTURE_PORT],
				inst->prop.height[OUTPUT_PORT]);

		if (inst->operating_rate)
			vote_data[i].fps = (inst->operating_rate >> 16) ?
				inst->operating_rate >> 16 : 1;
		else
			vote_data[i].fps = inst->prop.fps;

		/*
		 * TODO: support for OBP-DBP split mode hasn't been yet
		 * implemented, once it is, this part of code needs to be
		 * revisited since passing in accurate information to the bus
		 * governor will drastically reduce bandwidth
		 */
		//vote_data[i].color_formats[0] = get_hal_uncompressed(yuv);
		vote_data[i].num_formats = 1;
		i++;
	}
	mutex_unlock(&core->lock);

	rc = call_hfi_op(hdev, vote_bus, hdev->hfi_device_data, vote_data,
			vote_data_count);
	if (rc)
		dprintk(VIDC_ERR, "Failed to scale bus: %d\n", rc);

	kfree(vote_data);
	return rc;

fail_alloc:
	mutex_unlock(&core->lock);
	return rc;
}

static void msm_vidc_update_freq_entry(struct msm_vidc_inst *inst,
	unsigned long freq, ion_phys_addr_t device_addr)
{
	struct vidc_freq_data *temp, *next;
	bool found = false;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		if (temp->device_addr == device_addr) {
			temp->freq = freq;
			found = true;
			break;
		}
	}

	if (!found) {
		temp = kzalloc(sizeof(*temp), GFP_KERNEL);
		temp->freq = freq;
		temp->device_addr = device_addr;
		list_add_tail(&temp->list, &inst->freqs.list);
	}
	mutex_unlock(&inst->freqs.lock);
}

// TODO this needs to be removed later and use queued_list

void msm_vidc_clear_freq_entry(struct msm_vidc_inst *inst,
	ion_phys_addr_t device_addr)
{
	struct vidc_freq_data *temp, *next;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		if (temp->device_addr == device_addr)
			temp->freq = 0;
	}
	mutex_unlock(&inst->freqs.lock);
}


static unsigned long msm_vidc_get_highest_freq(struct msm_vidc_inst *inst)
{
	struct vidc_freq_data *temp;
	unsigned long freq = 0;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry(temp, &inst->freqs.list, list) {
		freq = max(freq, temp->freq);
	}
	mutex_unlock(&inst->freqs.lock);

	return freq;
}

void msm_comm_free_freq_table(struct msm_vidc_inst *inst)
{
	struct vidc_freq_data *temp, *next;

	mutex_lock(&inst->freqs.lock);
	list_for_each_entry_safe(temp, next, &inst->freqs.list, list) {
		list_del(&temp->list);
		kfree(temp);
	}
	INIT_LIST_HEAD(&inst->freqs.list);
	mutex_unlock(&inst->freqs.lock);
}


static inline int msm_dcvs_get_mbs_per_frame(struct msm_vidc_inst *inst)
{
@@ -50,6 +188,168 @@ static inline int msm_dcvs_get_mbs_per_frame(struct msm_vidc_inst *inst)
	return NUM_MBS_PER_FRAME(height, width);
}

static unsigned long msm_vidc_calc_freq(struct msm_vidc_inst *inst,
	u32 filled_len)
{
	unsigned long freq = 0;
	unsigned long vpp_cycles = 0, vsp_cycles = 0;
	u32 vpp_cycles_per_mb;
	u32 mbs_per_frame;

	mbs_per_frame = msm_dcvs_get_mbs_per_frame(inst);

	/*
	 * Calculate vpp, vsp cycles separately for encoder and decoder.
	 * Even though, most part is common now, in future it may change
	 * between them.
	 */

	if (inst->session_type == MSM_VIDC_ENCODER) {
		vpp_cycles_per_mb = inst->flags & VIDC_LOW_POWER ?
			inst->entry->low_power_cycles :
			inst->entry->vpp_cycles;

		vsp_cycles = mbs_per_frame * inst->entry->vsp_cycles;

		/* 10 / 7 is overhead factor */
		vsp_cycles += (inst->bitrate * 10) / 7;
	} else if (inst->session_type == MSM_VIDC_DECODER) {
		vpp_cycles = mbs_per_frame * inst->entry->vpp_cycles;

		vsp_cycles = mbs_per_frame * inst->entry->vsp_cycles;
		/* 10 / 7 is overhead factor */
		vsp_cycles += (inst->prop.fps * filled_len * 8 * 10) / 7;

	} else {
		// TODO return Min or Max ?
		dprintk(VIDC_ERR, "Unknown session type = %s\n", __func__);
		return freq;
	}

	freq = max(vpp_cycles, vsp_cycles);

	return freq;
}

static int msm_vidc_set_clocks(struct msm_vidc_core *core)
{
	struct hfi_device *hdev;
	unsigned long freq = 0, rate = 0;
	struct msm_vidc_inst *temp = NULL;
	int rc = 0, i = 0;
	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;

	hdev = core->device;
	allowed_clks_tbl = core->resources.allowed_clks_tbl;
	if (!hdev || !allowed_clks_tbl) {
		dprintk(VIDC_ERR,
			"%s Invalid parameters\n", __func__);
		return -EINVAL;
	}

	mutex_lock(&core->lock);
	list_for_each_entry(temp, &core->instances, list) {
		freq += temp->freq;
	}
	for (i = core->resources.allowed_clks_tbl_size - 1; i >= 0; i--) {
		rate = allowed_clks_tbl[i].clock_rate;
		if (rate >= freq)
			break;
	}
	mutex_unlock(&core->lock);

	core->freq = rate;
	dprintk(VIDC_PROF, "Voting for freq = %lu", freq);
	rc = call_hfi_op(hdev, scale_clocks,
			hdev->hfi_device_data, rate);

	return rc;
}

static unsigned long msm_vidc_max_freq(struct msm_vidc_inst *inst)
{
	struct allowed_clock_rates_table *allowed_clks_tbl = NULL;
	unsigned long freq = 0;

	allowed_clks_tbl = inst->core->resources.allowed_clks_tbl;
	freq = allowed_clks_tbl[0].clock_rate;
	dprintk(VIDC_PROF, "Max rate = %lu", freq);

	return freq;
}

int msm_comm_scale_clocks(struct msm_vidc_inst *inst)
{
	struct vb2_buf_entry *temp, *next;
	unsigned long freq = 0;
	u32 filled_len = 0;
	ion_phys_addr_t device_addr = 0;

	if (inst->count.fbd < DCVS_FTB_WINDOW) {
		freq = msm_vidc_max_freq(inst);
		goto decision_done;
	}

	mutex_lock(&inst->pendingq.lock);
	list_for_each_entry_safe(temp, next, &inst->pendingq.list, list) {
		if (temp->vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
			filled_len = max(filled_len,
				temp->vb->planes[0].bytesused);
			device_addr = temp->vb->planes[0].m.userptr;
		}
	}
	mutex_unlock(&inst->pendingq.lock);

	if (!filled_len || !device_addr) {
		freq = inst->freq;
		goto decision_done;
	}

	freq = msm_vidc_calc_freq(inst, filled_len);

	msm_vidc_update_freq_entry(inst, freq, device_addr);

	freq = msm_vidc_get_highest_freq(inst);

decision_done:
	inst->freq = freq;
	msm_vidc_set_clocks(inst->core);
	return 0;
}

int msm_comm_scale_clocks_and_bus(struct msm_vidc_inst *inst)
{
	struct msm_vidc_core *core;
	struct hfi_device *hdev;

	if (!inst || !inst->core || !inst->core->device) {
		dprintk(VIDC_ERR, "%s Invalid params\n", __func__);
		return -EINVAL;
	}
	core = inst->core;
	hdev = core->device;

	if (msm_comm_scale_clocks(inst)) {
		dprintk(VIDC_WARN,
			"Failed to scale clocks. Performance might be impacted\n");
	}
	if (msm_comm_vote_bus(core)) {
		dprintk(VIDC_WARN,
			"Failed to scale DDR bus. Performance might be impacted\n");
	}
	return 0;
}

int msm_dcvs_try_enable(struct msm_vidc_inst *inst)
{
	if (!inst) {
		dprintk(VIDC_ERR, "%s: Invalid args: %p\n", __func__, inst);
		return -EINVAL;
	}
	inst->dcvs_mode = msm_dcvs_check_supported(inst);
	return 0;
}

static inline int msm_dcvs_count_active_instances(struct msm_vidc_core *core,
	enum session_type session_type)
{
@@ -104,6 +404,44 @@ static bool msm_dcvs_check_codec_supported(int fourcc,
	return codec_type && session_type;
}

int msm_comm_init_clocks_and_bus_data(struct msm_vidc_inst *inst)
{

	int rc = 0, j = 0;
	struct clock_freq_table *clk_freq_tbl = NULL;
	struct clock_profile_entry *entry = NULL;
	int fourcc;

	clk_freq_tbl = &inst->core->resources.clock_freq_tbl;
	fourcc = inst->session_type == MSM_VIDC_DECODER ?
		inst->fmts[OUTPUT_PORT].fourcc :
		inst->fmts[CAPTURE_PORT].fourcc;

	for (j = 0; j < clk_freq_tbl->count; j++) {
		bool matched = false;

		entry = &clk_freq_tbl->clk_prof_entries[j];

		matched = msm_dcvs_check_codec_supported(
				fourcc,
				entry->codec_mask,
				inst->session_type);

		if (matched) {
			inst->entry = entry;
			break;
		}
	}

	if (j == clk_freq_tbl->count) {
		dprintk(VIDC_ERR,
			"Failed : No matching clock entry found\n");
		rc = -EINVAL;
	}

	return rc;
}

static void msm_dcvs_update_dcvs_params(int idx, struct msm_vidc_inst *inst)
{
	struct dcvs_stats *dcvs = NULL;
@@ -439,8 +777,6 @@ static int msm_dcvs_enc_scale_clocks(struct msm_vidc_inst *inst)
			dcvs->prev_freq_lowered ? "Lower" : "Higher",
			dcvs->load, total_input_buf, fw_pending_bufs);

		rc = msm_comm_scale_clocks_load(core, dcvs->load,
				LOAD_CALC_NO_QUIRKS);
		if (rc) {
			dprintk(VIDC_PROF,
				"Failed to set clock rate in FBD: %d\n", rc);
@@ -519,8 +855,6 @@ static int msm_dcvs_dec_scale_clocks(struct msm_vidc_inst *inst, bool fbd)
			dcvs->load, total_output_buf, buffers_outside_fw,
			dcvs->threshold_disp_buf_high, dcvs->transition_turbo);

		rc = msm_comm_scale_clocks_load(core, dcvs->load,
				LOAD_CALC_NO_QUIRKS);
		if (rc) {
			dprintk(VIDC_ERR,
				"Failed to set clock rate in FBD: %d\n", rc);
@@ -624,7 +958,7 @@ static bool msm_dcvs_check_supported(struct msm_vidc_inst *inst)
	}
dcvs_decision_done:
	if (!is_dcvs_supported) {
		msm_comm_scale_clocks(core);
		msm_comm_scale_clocks(inst);
		if (instance_count > 1) {
			mutex_lock(&core->lock);
			list_for_each_entry(temp, &core->instances, list)
Loading