Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit cb92d452 authored by Alex Deucher's avatar Alex Deucher Committed by Dave Airlie
Browse files

drm/radeon/kms: add blit support for cayman (v2)



Allows us to use the 3D engine for memory management
and allows us to use vram beyond the BAR aperture.

v2: fix copy paste typo
Reported-by: default avatarNils Wallménius <nils.wallmenius@gmail.com>

Signed-off-by: default avatarAlex Deucher <alexdeucher@gmail.com>
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
parent ac10f81d
Loading
Loading
Loading
Loading
+322 −4
Original line number Diff line number Diff line
@@ -39,17 +39,335 @@

const u32 cayman_default_state[] =
{
	/* XXX fill in additional blit state */
	0xc0066900,
	0x00000000,
	0x00000060, /* DB_RENDER_CONTROL */
	0x00000000, /* DB_COUNT_CONTROL */
	0x00000000, /* DB_DEPTH_VIEW */
	0x0000002a, /* DB_RENDER_OVERRIDE */
	0x00000000, /* DB_RENDER_OVERRIDE2 */
	0x00000000, /* DB_HTILE_DATA_BASE */

	0xc0026900,
	0x00000316,
	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
	0x00000010, /*  */
	0x0000000a,
	0x00000000, /* DB_STENCIL_CLEAR */
	0x00000000, /* DB_DEPTH_CLEAR */

	0xc0036900,
	0x0000000f,
	0x00000000, /* DB_DEPTH_INFO */
	0x00000000, /* DB_Z_INFO */
	0x00000000, /* DB_STENCIL_INFO */

	0xc0016900,
	0x00000080,
	0x00000000, /* PA_SC_WINDOW_OFFSET */

	0xc00d6900,
	0x00000083,
	0x0000ffff, /* PA_SC_CLIPRECT_RULE */
	0x00000000, /* PA_SC_CLIPRECT_0_TL */
	0x20002000, /* PA_SC_CLIPRECT_0_BR */
	0x00000000,
	0x20002000,
	0x00000000,
	0x20002000,
	0x00000000,
	0x20002000,
	0xaaaaaaaa, /* PA_SC_EDGERULE */
	0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
	0x0000000f, /* CB_TARGET_MASK */
	0x0000000f, /* CB_SHADER_MASK */

	0xc0226900,
	0x00000094,
	0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
	0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x80000000,
	0x20002000,
	0x00000000, /* PA_SC_VPORT_ZMIN_0 */
	0x3f800000, /* PA_SC_VPORT_ZMAX_0 */

	0xc0016900,
	0x000000d4,
	0x00000000, /* SX_MISC */

	0xc0026900,
	0x000000d9,
	0x00000000, /* CP_RINGID */
	0x00000000, /* CP_VMID */

	0xc0096900,
	0x00000100,
	0x00ffffff, /* VGT_MAX_VTX_INDX */
	0x00000000, /* VGT_MIN_VTX_INDX */
	0x00000000, /* VGT_INDX_OFFSET */
	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
	0x00000000, /* SX_ALPHA_TEST_CONTROL */
	0x00000000, /* CB_BLEND_RED */
	0x00000000, /* CB_BLEND_GREEN */
	0x00000000, /* CB_BLEND_BLUE */
	0x00000000, /* CB_BLEND_ALPHA */

	0xc0016900,
	0x00000187,
	0x00000100, /* SPI_VS_OUT_ID_0 */

	0xc0026900,
	0x00000191,
	0x00000100, /* SPI_PS_INPUT_CNTL_0 */
	0x00000101, /* SPI_PS_INPUT_CNTL_1 */

	0xc0016900,
	0x000001b1,
	0x00000000, /* SPI_VS_OUT_CONFIG */

	0xc0106900,
	0x000001b3,
	0x20000001, /* SPI_PS_IN_CONTROL_0 */
	0x00000000, /* SPI_PS_IN_CONTROL_1 */
	0x00000000, /* SPI_INTERP_CONTROL_0 */
	0x00000000, /* SPI_INPUT_Z */
	0x00000000, /* SPI_FOG_CNTL */
	0x00100000, /* SPI_BARYC_CNTL */
	0x00000000, /* SPI_PS_IN_CONTROL_2 */
	0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
	0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
	0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
	0x00000000, /* SPI_GPR_MGMT */
	0x00000000, /* SPI_LDS_MGMT */
	0x00000000, /* SPI_STACK_MGMT */
	0x00000000, /* SPI_WAVE_MGMT_1 */
	0x00000000, /* SPI_WAVE_MGMT_2 */

	0xc0016900,
	0x000001e0,
	0x00000000, /* CB_BLEND0_CONTROL */

	0xc00e6900,
	0x00000200,
	0x00000000, /* DB_DEPTH_CONTROL */
	0x00000000, /* DB_EQAA */
	0x00cc0010, /* CB_COLOR_CONTROL */
	0x00000210, /* DB_SHADER_CONTROL */
	0x00010000, /* PA_CL_CLIP_CNTL */
	0x00000004, /* PA_SU_SC_MODE_CNTL */
	0x00000100, /* PA_CL_VTE_CNTL */
	0x00000000, /* PA_CL_VS_OUT_CNTL */
	0x00000000, /* PA_CL_NANINF_CNTL */
	0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
	0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
	0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
	0x00000000, /*  */
	0x00000000, /*  */

	0xc0026900,
	0x00000229,
	0x00000000, /* SQ_PGM_START_FS */
	0x00000000,

	0xc0016900,
	0x0000023b,
	0x00000000, /* SQ_LDS_ALLOC_PS */

	0xc0066900,
	0x00000240,
	0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,

	0xc0046900,
	0x00000247,
	0x00000000, /* SQ_GS_VERT_ITEMSIZE */
	0x00000000,
	0x00000000,
	0x00000000,

	0xc0116900,
	0x00000280,
	0x00000000, /* PA_SU_POINT_SIZE */
	0x00000000, /* PA_SU_POINT_MINMAX */
	0x00000008, /* PA_SU_LINE_CNTL */
	0x00000000, /* PA_SC_LINE_STIPPLE */
	0x00000000, /* VGT_OUTPUT_PATH_CNTL */
	0x00000000, /* VGT_HOS_CNTL */
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000, /* VGT_GS_MODE */

	0xc0026900,
	0x00000292,
	0x00000000, /* PA_SC_MODE_CNTL_0 */
	0x00000000, /* PA_SC_MODE_CNTL_1 */

	0xc0016900,
	0x000002a1,
	0x00000000, /* VGT_PRIMITIVEID_EN */

	0xc0016900,
	0x000002a5,
	0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */

	0xc0026900,
	0x000002a8,
	0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
	0x00000000,

	0xc0026900,
	0x000002ad,
	0x00000000, /* VGT_REUSE_OFF */
	0x00000000,

	0xc0016900,
	0x000002d5,
	0x00000000, /* VGT_SHADER_STAGES_EN */

	0xc0016900,
	0x000002dc,
	0x0000aa00, /* DB_ALPHA_TO_MASK */

	0xc0066900,
	0x000002de,
	0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,

	0xc0026900,
	0x000002e5,
	0x00000000, /* VGT_STRMOUT_CONFIG */
	0x00000000,

	0xc01b6900,
	0x000002f5,
	0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
	0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
	0x00000000, /* PA_SC_LINE_CNTL */
	0x00000000, /* PA_SC_AA_CONFIG */
	0x00000005, /* PA_SU_VTX_CNTL */
	0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
	0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
	0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
	0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
	0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0x00000000,
	0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
	0xffffffff,

	0xc0026900,
	0x00000316,
	0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
	0x00000010, /*  */
};

const u32 cayman_vs[] =
{
	0x00000004,
	0x80400400,
	0x0000a03c,
	0x95000688,
	0x00004000,
	0x15000688,
	0x00000000,
	0x88000000,
	0x04000000,
	0x67961001,
#ifdef __BIG_ENDIAN
	0x00020000,
#else
	0x00000000,
#endif
	0x00000000,
	0x04000000,
	0x67961000,
#ifdef __BIG_ENDIAN
	0x00020008,
#else
	0x00000008,
#endif
	0x00000000,
};

const u32 cayman_ps[] =
{
	0x00000004,
	0xa00c0000,
	0x00000008,
	0x80400000,
	0x00000000,
	0x95000688,
	0x00000000,
	0x88000000,
	0x00380400,
	0x00146b10,
	0x00380000,
	0x20146b10,
	0x00380400,
	0x40146b00,
	0x80380000,
	0x60146b00,
	0x00000010,
	0x000d1000,
	0xb0800000,
	0x00000000,
};

const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
+3 −0
Original line number Diff line number Diff line
@@ -25,8 +25,11 @@
#ifndef CAYMAN_BLIT_SHADERS_H
#define CAYMAN_BLIT_SHADERS_H

extern const u32 cayman_ps[];
extern const u32 cayman_vs[];
extern const u32 cayman_default_state[];

extern const u32 cayman_ps_size, cayman_vs_size;
extern const u32 cayman_default_size;

#endif
+265 −240
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@

#include "evergreend.h"
#include "evergreen_blit_shaders.h"
#include "cayman_blit_shaders.h"

#define DI_PT_RECTLIST        0x11
#define DI_INDEX_SIZE_16_BIT  0x0
@@ -265,6 +266,11 @@ set_default_state(struct radeon_device *rdev)
	u64 gpu_addr;
	int dwords;

	/* set clear context state */
	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
	radeon_ring_write(rdev, 0);

	if (rdev->family < CHIP_CAYMAN) {
		switch (rdev->family) {
		case CHIP_CEDAR:
		default:
@@ -474,10 +480,6 @@ set_default_state(struct radeon_device *rdev)
		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));

	/* set clear context state */
	radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
	radeon_ring_write(rdev, 0);

		/* disable dyn gprs */
		radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
		radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
@@ -497,6 +499,7 @@ set_default_state(struct radeon_device *rdev)
		radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
		radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
		radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
	}

	/* CONTEXT_CONTROL */
	radeon_ring_write(rdev, 0xc0012800);
@@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
	mutex_init(&rdev->r600_blit.mutex);
	rdev->r600_blit.state_offset = 0;

	if (rdev->family < CHIP_CAYMAN)
		rdev->r600_blit.state_len = evergreen_default_size;
	else
		rdev->r600_blit.state_len = cayman_default_size;

	dwords = rdev->r600_blit.state_len;
	while (dwords & 0xf) {
@@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
	obj_size = ALIGN(obj_size, 256);

	rdev->r600_blit.vs_offset = obj_size;
	if (rdev->family < CHIP_CAYMAN)
		obj_size += evergreen_vs_size * 4;
	else
		obj_size += cayman_vs_size * 4;
	obj_size = ALIGN(obj_size, 256);

	rdev->r600_blit.ps_offset = obj_size;
	if (rdev->family < CHIP_CAYMAN)
		obj_size += evergreen_ps_size * 4;
	else
		obj_size += cayman_ps_size * 4;
	obj_size = ALIGN(obj_size, 256);

	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
@@ -609,6 +621,7 @@ int evergreen_blit_init(struct radeon_device *rdev)
		return r;
	}

	if (rdev->family < CHIP_CAYMAN) {
		memcpy_toio(ptr + rdev->r600_blit.state_offset,
			    evergreen_default_state, rdev->r600_blit.state_len * 4);

@@ -619,6 +632,18 @@ int evergreen_blit_init(struct radeon_device *rdev)
			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
		for (i = 0; i < evergreen_ps_size; i++)
			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
	} else {
		memcpy_toio(ptr + rdev->r600_blit.state_offset,
			    cayman_default_state, rdev->r600_blit.state_len * 4);

		if (num_packet2s)
			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
				    packet2s, num_packet2s * 4);
		for (i = 0; i < cayman_vs_size; i++)
			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
		for (i = 0; i < cayman_ps_size; i++)
			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
	}
	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
	radeon_bo_unreserve(rdev->r600_blit.shader_obj);

+5 −8
Original line number Diff line number Diff line
@@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
		return r;
	cayman_gpu_init(rdev);

#if 0
	r = cayman_blit_init(rdev);
	r = evergreen_blit_init(rdev);
	if (r) {
		cayman_blit_fini(rdev);
		evergreen_blit_fini(rdev);
		rdev->asic->copy = NULL;
		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
	}
#endif

	/* allocate wb buffer */
	r = radeon_wb_init(rdev);
@@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)

int cayman_suspend(struct radeon_device *rdev)
{
	/* int r; */
	int r;

	/* FIXME: we should wait for ring to be empty */
	cayman_cp_enable(rdev, false);
@@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
	radeon_wb_disable(rdev);
	cayman_pcie_gart_disable(rdev);

#if 0
	/* unpin shaders bo */
	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
	if (likely(r == 0)) {
		radeon_bo_unpin(rdev->r600_blit.shader_obj);
		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
	}
#endif

	return 0;
}

@@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)

void cayman_fini(struct radeon_device *rdev)
{
	/* cayman_blit_fini(rdev); */
	evergreen_blit_fini(rdev);
	cayman_cp_fini(rdev);
	r600_irq_fini(rdev);
	radeon_wb_fini(rdev);
+3 −3
Original line number Diff line number Diff line
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
	.get_vblank_counter = &evergreen_get_vblank_counter,
	.fence_ring_emit = &r600_fence_ring_emit,
	.cs_parse = &evergreen_cs_parse,
	.copy_blit = NULL,
	.copy_dma = NULL,
	.copy = NULL,
	.copy_blit = &evergreen_copy_blit,
	.copy_dma = &evergreen_copy_blit,
	.copy = &evergreen_copy_blit,
	.get_engine_clock = &radeon_atom_get_engine_clock,
	.set_engine_clock = &radeon_atom_set_engine_clock,
	.get_memory_clock = &radeon_atom_get_memory_clock,