Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 22c775ce authored by Alex Deucher's avatar Alex Deucher
Browse files

drm/radeon: implement clock and power gating for CIK (v3)



Only the APUs support power gating.

v2: disable cgcg for now
v3: workaround hw issue in mgcg

Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1fd11777
Loading
Loading
Loading
Loading
+1454 −21

File changed.

Preview size limit exceeded, changes collapsed.

+94 −1
Original line number Diff line number Diff line
@@ -299,6 +299,10 @@
#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
#define	VM_CONTEXT1_PAGE_TABLE_END_ADDR			0x1580

#define VM_L2_CG           				0x15c0
#define		MC_CG_ENABLE				(1 << 18)
#define		MC_LS_ENABLE				(1 << 19)

#define MC_SHARED_CHMAP						0x2004
#define		NOOFCHAN_SHIFT					12
#define		NOOFCHAN_MASK					0x0000f000
@@ -328,6 +332,17 @@

#define MC_SHARED_BLACKOUT_CNTL           		0x20ac

#define MC_HUB_MISC_HUB_CG           			0x20b8
#define MC_HUB_MISC_VM_CG           			0x20bc

#define MC_HUB_MISC_SIP_CG           			0x20c0

#define MC_XPB_CLK_GAT           			0x2478

#define MC_CITF_MISC_RD_CG           			0x2648
#define MC_CITF_MISC_WR_CG           			0x264c
#define MC_CITF_MISC_VM_CG           			0x2650

#define	MC_ARB_RAMCFG					0x2760
#define		NOOFBANK_SHIFT					0
#define		NOOFBANK_MASK					0x00000003
@@ -357,6 +372,7 @@
#define MC_SEQ_IO_DEBUG_DATA           			0x2a48

#define	HDP_HOST_PATH_CNTL				0x2C00
#define 	CLOCK_GATING_DIS			(1 << 23)
#define	HDP_NONSURFACE_BASE				0x2C04
#define	HDP_NONSURFACE_INFO				0x2C08
#define	HDP_NONSURFACE_SIZE				0x2C0C
@@ -364,6 +380,10 @@
#define HDP_ADDR_CONFIG  				0x2F48
#define HDP_MISC_CNTL					0x2F4C
#define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
#define HDP_MEM_POWER_LS				0x2F50
#define 	HDP_LS_ENABLE				(1 << 0)

#define ATC_MISC_CG           				0x3350

#define IH_RB_CNTL                                        0x3e00
#       define IH_RB_ENABLE                               (1 << 0)
@@ -631,6 +651,9 @@

#define	CP_RB0_RPTR					0x8700
#define	CP_RB_WPTR_DELAY				0x8704
#define	CP_RB_WPTR_POLL_CNTL				0x8708
#define		IDLE_POLL_COUNT(x)			((x) << 16)
#define		IDLE_POLL_COUNT_MASK			(0xffff << 16)

#define CP_MEQ_THRESHOLDS				0x8764
#define		MEQ1_START(x)				((x) << 0)
@@ -857,6 +880,9 @@
#       define CP_RINGID1_INT_STAT                      (1 << 30)
#       define CP_RINGID0_INT_STAT                      (1 << 31)

#define CP_MEM_SLP_CNTL                                 0xC1E4
#       define CP_MEM_LS_EN                             (1 << 0)

#define CP_CPF_DEBUG                                    0xC200

#define CP_PQ_WPTR_POLL_CNTL                            0xC20C
@@ -902,6 +928,9 @@

#define RLC_MC_CNTL                                       0xC30C

#define RLC_MEM_SLP_CNTL                                  0xC318
#       define RLC_MEM_LS_EN                              (1 << 0)

#define RLC_LB_CNTR_MAX                                   0xC348

#define RLC_LB_CNTL                                       0xC364
@@ -910,7 +939,9 @@
#define RLC_LB_CNTR_INIT                                  0xC36C

#define RLC_SAVE_AND_RESTORE_BASE                         0xC374
#define RLC_DRIVER_DMA_STATUS                             0xC378
#define RLC_DRIVER_DMA_STATUS                             0xC378 /* dGPU */
#define RLC_CP_TABLE_RESTORE                              0xC378 /* APU */
#define RLC_PG_DELAY_2                                    0xC37C

#define RLC_GPM_UCODE_ADDR                                0xC388
#define RLC_GPM_UCODE_DATA                                0xC38C
@@ -919,12 +950,50 @@
#define RLC_CAPTURE_GPU_CLOCK_COUNT                       0xC398
#define RLC_UCODE_CNTL                                    0xC39C

#define RLC_GPM_STAT                                      0xC400
#       define RLC_GPM_BUSY                               (1 << 0)

#define RLC_PG_CNTL                                       0xC40C
#       define GFX_PG_ENABLE                              (1 << 0)
#       define GFX_PG_SRC                                 (1 << 1)
#       define DYN_PER_CU_PG_ENABLE                       (1 << 2)
#       define STATIC_PER_CU_PG_ENABLE                    (1 << 3)
#       define DISABLE_GDS_PG                             (1 << 13)
#       define DISABLE_CP_PG                              (1 << 15)
#       define SMU_CLK_SLOWDOWN_ON_PU_ENABLE              (1 << 17)
#       define SMU_CLK_SLOWDOWN_ON_PD_ENABLE              (1 << 18)

#define RLC_CGTT_MGCG_OVERRIDE                            0xC420
#define RLC_CGCG_CGLS_CTRL                                0xC424
#       define CGCG_EN                                    (1 << 0)
#       define CGLS_EN                                    (1 << 1)

#define RLC_PG_DELAY                                      0xC434

#define RLC_LB_INIT_CU_MASK                               0xC43C

#define RLC_LB_PARAMS                                     0xC444

#define RLC_PG_AO_CU_MASK                                 0xC44C

#define	RLC_MAX_PG_CU					0xC450
#	define MAX_PU_CU(x)				((x) << 0)
#	define MAX_PU_CU_MASK				(0xff << 0)
#define RLC_AUTO_PG_CTRL                                  0xC454
#       define AUTO_PG_EN                                 (1 << 0)
#	define GRBM_REG_SGIT(x)				((x) << 3)
#	define GRBM_REG_SGIT_MASK			(0xffff << 3)

#define RLC_SERDES_WR_CU_MASTER_MASK                      0xC474
#define RLC_SERDES_WR_NONCU_MASTER_MASK                   0xC478
#define RLC_SERDES_WR_CTRL                                0xC47C
#define		BPM_ADDR(x)				((x) << 0)
#define		BPM_ADDR_MASK      			(0xff << 0)
#define		CGLS_ENABLE				(1 << 16)
#define		CGCG_OVERRIDE_0				(1 << 20)
#define		MGCG_OVERRIDE_0				(1 << 22)
#define		MGCG_OVERRIDE_1				(1 << 23)

#define RLC_SERDES_CU_MASTER_BUSY                         0xC484
#define RLC_SERDES_NONCU_MASTER_BUSY                      0xC488
#       define SE_MASTER_BUSY_MASK                        0x0000ffff
@@ -979,6 +1048,8 @@
#define		MQD_VMID(x)				((x) << 0)
#define		MQD_VMID_MASK      			(0xf << 0)

#define DB_RENDER_CONTROL                               0x28000

#define PA_SC_RASTER_CONFIG                             0x28350
#       define RASTER_CONFIG_RB_MAP_0                   0
#       define RASTER_CONFIG_RB_MAP_1                   1
@@ -1072,6 +1143,16 @@

#define	CP_PERFMON_CNTL					0x36020

#define	CGTS_SM_CTRL_REG				0x3c000
#define		SM_MODE(x)				((x) << 17)
#define		SM_MODE_MASK				(0x7 << 17)
#define		SM_MODE_ENABLE				(1 << 20)
#define		CGTS_OVERRIDE				(1 << 21)
#define		CGTS_LS_OVERRIDE			(1 << 22)
#define		ON_MONITOR_ADD_EN			(1 << 23)
#define		ON_MONITOR_ADD(x)			((x) << 24)
#define		ON_MONITOR_ADD_MASK			(0xff << 24)

#define	CGTS_TCC_DISABLE				0x3c00c
#define	CGTS_USER_TCC_DISABLE				0x3c010
#define		TCC_DISABLE_MASK				0xFFFF0000
@@ -1304,6 +1385,8 @@

#define	SDMA0_UCODE_ADDR                                  0xD000
#define	SDMA0_UCODE_DATA                                  0xD004
#define	SDMA0_POWER_CNTL                                  0xD008
#define	SDMA0_CLK_CTRL                                    0xD00C

#define SDMA0_CNTL                                        0xD010
#       define TRAP_ENABLE                                (1 << 0)
@@ -1428,6 +1511,13 @@
#define UVD_RBC_RB_RPTR			0xf690
#define UVD_RBC_RB_WPTR			0xf694

#define	UVD_CGC_CTRL					0xF4B0
#	define DCM					(1 << 0)
#	define CG_DT(x)					((x) << 2)
#	define CG_DT_MASK				(0xf << 2)
#	define CLK_OD(x)				((x) << 6)
#	define CLK_OD_MASK				(0x1f << 6)

/* UVD clocks */

#define CG_DCLK_CNTL			0xC050009C
@@ -1438,4 +1528,7 @@
#define CG_VCLK_CNTL			0xC05000A4
#define CG_VCLK_STATUS			0xC05000A8

/* UVD CTX indirect */
#define	UVD_CGC_MEM_CTRL				0xC0

#endif
+944 −0

File added.

Preview size limit exceeded, changes collapsed.

+56 −1
Original line number Diff line number Diff line
@@ -140,6 +140,7 @@ extern void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
				     int ring, u32 cp_int_cntl);
extern void cayman_vm_decode_fault(struct radeon_device *rdev,
				   u32 status, u32 addr);
void cik_init_cp_pg_table(struct radeon_device *rdev);

static const u32 evergreen_golden_registers[] =
{
@@ -3893,7 +3894,21 @@ void sumo_rlc_fini(struct radeon_device *rdev)
		radeon_bo_unref(&rdev->rlc.clear_state_obj);
		rdev->rlc.clear_state_obj = NULL;
	}

	/* clear state block */
	if (rdev->rlc.cp_table_obj) {
		r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
		if (unlikely(r != 0))
			dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
		radeon_bo_unpin(rdev->rlc.cp_table_obj);
		radeon_bo_unreserve(rdev->rlc.cp_table_obj);

		radeon_bo_unref(&rdev->rlc.cp_table_obj);
		rdev->rlc.cp_table_obj = NULL;
	}
}

#define CP_ME_TABLE_SIZE    96

int sumo_rlc_init(struct radeon_device *rdev)
{
@@ -3980,9 +3995,10 @@ int sumo_rlc_init(struct radeon_device *rdev)
		}
		reg_list_blk_index = (3 * reg_list_num + 2);
		dws += reg_list_blk_index;
		rdev->rlc.clear_state_size = dws;

		if (rdev->rlc.clear_state_obj == NULL) {
			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
			r = radeon_bo_create(rdev, rdev->rlc.clear_state_size * 4, PAGE_SIZE, true,
					     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
			if (r) {
				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
@@ -4046,6 +4062,45 @@ int sumo_rlc_init(struct radeon_device *rdev)
		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
	}

	if (rdev->rlc.cp_table_size) {
		if (rdev->rlc.cp_table_obj == NULL) {
			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true,
					     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj);
			if (r) {
				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
				sumo_rlc_fini(rdev);
				return r;
			}
		}

		r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
		if (unlikely(r != 0)) {
			dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
			sumo_rlc_fini(rdev);
			return r;
		}
		r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM,
				  &rdev->rlc.cp_table_gpu_addr);
		if (r) {
			radeon_bo_unreserve(rdev->rlc.cp_table_obj);
			dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r);
			sumo_rlc_fini(rdev);
			return r;
		}
		r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void **)&rdev->rlc.cp_table_ptr);
		if (r) {
			dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r);
			sumo_rlc_fini(rdev);
			return r;
		}

		cik_init_cp_pg_table(rdev);

		radeon_bo_kunmap(rdev->rlc.cp_table_obj);
		radeon_bo_unreserve(rdev->rlc.cp_table_obj);

	}

	return 0;
}

+14 −0
Original line number Diff line number Diff line
@@ -152,6 +152,14 @@ extern int radeon_aspm;
#define RADEON_RESET_MC				(1 << 10)
#define RADEON_RESET_DISPLAY			(1 << 11)

/* CG block flags */
#define RADEON_CG_BLOCK_GFX			(1 << 0)
#define RADEON_CG_BLOCK_MC			(1 << 1)
#define RADEON_CG_BLOCK_SDMA			(1 << 2)
#define RADEON_CG_BLOCK_UVD			(1 << 3)
#define RADEON_CG_BLOCK_VCE			(1 << 4)
#define RADEON_CG_BLOCK_HDP			(1 << 5)

/* max cursor sizes (in pixels) */
#define CURSOR_WIDTH 64
#define CURSOR_HEIGHT 64
@@ -861,6 +869,12 @@ struct radeon_rlc {
	uint64_t		clear_state_gpu_addr;
	volatile uint32_t	*cs_ptr;
	const struct cs_section_def   *cs_data;
	u32                     clear_state_size;
	/* for cp tables */
	struct radeon_bo	*cp_table_obj;
	uint64_t		cp_table_gpu_addr;
	volatile uint32_t	*cp_table_ptr;
	u32                     cp_table_size;
};

int radeon_ib_get(struct radeon_device *rdev, int ring,
Loading