Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5f6474a4 authored by Ben Skeggs's avatar Ben Skeggs
Browse files

drm/nouveau/gr/gf100-: port tile mapping calculations from NVGPU



There's also a couple of hardcoded tables for a couple of very specific
configurations that NVGPU's algorithm didn't work for.

Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 5c05a589
Loading
Loading
Loading
Loading
+5 −18
Original line number Original line Diff line number Diff line
@@ -1116,27 +1116,14 @@ gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 data[6] = {}, data2[2] = {};
	u32 data[6] = {}, data2[2] = {};
	u8  tpcnr[GPC_MAX];
	u8  shift, ntpcv;
	u8  shift, ntpcv;
	int gpc, tpc, i;
	int i;

	/* calculate first set of magics */
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));

	gpc = -1;
	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
		do {
			gpc = (gpc + 1) % gr->gpc_nr;
		} while (!tpcnr[gpc]);
		tpcnr[gpc]--;

		data[tpc / 6] |= gpc << ((tpc % 6) * 5);
	}


	for (; tpc < 32; tpc++)
	/* Pack tile map into register format. */
		data[tpc / 6] |= 7 << ((tpc % 6) * 5);
	for (i = 0; i < 32; i++)
		data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);


	/* and the second... */
	/* Magic. */
	shift = 0;
	shift = 0;
	ntpcv = gr->tpc_total;
	ntpcv = gr->tpc_total;
	while (!(ntpcv & (1 << 4))) {
	while (!(ntpcv & (1 << 4))) {
+5 −18
Original line number Original line Diff line number Diff line
@@ -194,27 +194,14 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
{
	struct nvkm_device *device = gr->base.engine.subdev.device;
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 data[6] = {}, data2[2] = {};
	u32 data[6] = {}, data2[2] = {};
	u8  tpcnr[GPC_MAX];
	u8  shift, ntpcv;
	u8  shift, ntpcv;
	int gpc, tpc, i;
	int i;

	/* calculate first set of magics */
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));

	gpc = -1;
	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
		do {
			gpc = (gpc + 1) % gr->gpc_nr;
		} while (!tpcnr[gpc]);
		tpcnr[gpc]--;

		data[tpc / 6] |= gpc << ((tpc % 6) * 5);
	}


	for (; tpc < 32; tpc++)
	/* Pack tile map into register format. */
		data[tpc / 6] |= 7 << ((tpc % 6) * 5);
	for (i = 0; i < 32; i++)
		data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);


	/* and the second... */
	/* Magic. */
	shift = 0;
	shift = 0;
	ntpcv = gr->tpc_total;
	ntpcv = gr->tpc_total;
	while (!(ntpcv & (1 << 4))) {
	while (!(ntpcv & (1 << 4))) {
+79 −39
Original line number Original line Diff line number Diff line
@@ -1652,6 +1652,82 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
	return ret;
	return ret;
}
}


void
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
{
	static const u8 primes[] = {
		3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
	};
	int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
	u32 mul_factor, comm_denom;
	u8  gpc_map[GPC_MAX];
	bool sorted;

	switch (gr->tpc_total) {
	case 15: gr->screen_tile_row_offset = 0x06; break;
	case 14: gr->screen_tile_row_offset = 0x05; break;
	case 13: gr->screen_tile_row_offset = 0x02; break;
	case 11: gr->screen_tile_row_offset = 0x07; break;
	case 10: gr->screen_tile_row_offset = 0x06; break;
	case  7:
	case  5: gr->screen_tile_row_offset = 0x01; break;
	case  3: gr->screen_tile_row_offset = 0x02; break;
	case  2:
	case  1: gr->screen_tile_row_offset = 0x01; break;
	default: gr->screen_tile_row_offset = 0x03;
		for (i = 0; i < ARRAY_SIZE(primes); i++) {
			if (gr->tpc_total % primes[i]) {
				gr->screen_tile_row_offset = primes[i];
				break;
			}
		}
		break;
	}

	/* Sort GPCs by TPC count, highest-to-lowest. */
	for (i = 0; i < gr->gpc_nr; i++)
		gpc_map[i] = i;
	sorted = false;

	while (!sorted) {
		for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
			if (gr->tpc_nr[gpc_map[i + 1]] >
			    gr->tpc_nr[gpc_map[i + 0]]) {
				u8 swap = gpc_map[i];
				gpc_map[i + 0] = gpc_map[i + 1];
				gpc_map[i + 1] = swap;
				sorted = false;
			}
		}
	}

	/* Determine tile->GPC mapping */
	mul_factor = gr->gpc_nr * gr->tpc_max;
	if (mul_factor & 1)
		mul_factor = 2;
	else
		mul_factor = 1;

	comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;

	for (i = 0; i < gr->gpc_nr; i++) {
		init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
		 init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
		  run_err[i] = init_frac[i] + init_err[i];
	}

	for (i = 0; i < gr->tpc_total;) {
		for (j = 0; j < gr->gpc_nr; j++) {
			if ((run_err[j] * 2) >= comm_denom) {
				gr->tile[i++] = gpc_map[j];
				run_err[j] += init_frac[j] - comm_denom;
			} else {
				run_err[j] += init_frac[j];
			}
		}
	}
}

static int
static int
gf100_gr_oneinit(struct nvkm_gr *base)
gf100_gr_oneinit(struct nvkm_gr *base)
{
{
@@ -1691,45 +1767,8 @@ gf100_gr_oneinit(struct nvkm_gr *base)
		}
		}
	}
	}


	/*XXX: these need figuring out... though it might not even matter */
	memset(gr->tile, 0xff, sizeof(gr->tile));
	switch (device->chipset) {
	gr->func->oneinit_tiles(gr);
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->screen_tile_row_offset = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->screen_tile_row_offset = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->screen_tile_row_offset = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->screen_tile_row_offset = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->screen_tile_row_offset = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->screen_tile_row_offset = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->screen_tile_row_offset = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->screen_tile_row_offset = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->screen_tile_row_offset = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->screen_tile_row_offset = 0x01;
		break;
	}

	return 0;
	return 0;
}
}


@@ -2164,6 +2203,7 @@ gf100_gr_gpccs_ucode = {


static const struct gf100_gr_func
static const struct gf100_gr_func
gf100_gr = {
gf100_gr = {
	.oneinit_tiles = gf100_gr_oneinit_tiles,
	.init = gf100_gr_init,
	.init = gf100_gr_init,
	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+6 −2
Original line number Original line Diff line number Diff line
@@ -107,12 +107,13 @@ struct gf100_gr {
	u8 ppc_tpc_nr[GPC_MAX][4];
	u8 ppc_tpc_nr[GPC_MAX][4];
	u8 ppc_tpc_min;
	u8 ppc_tpc_min;


	u8 screen_tile_row_offset;
	u8 tile[TPC_MAX];

	struct gf100_gr_data mmio_data[4];
	struct gf100_gr_data mmio_data[4];
	struct gf100_gr_mmio mmio_list[4096/8];
	struct gf100_gr_mmio mmio_list[4096/8];
	u32  size;
	u32  size;
	u32 *data;
	u32 *data;

	u8 screen_tile_row_offset;
};
};


int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
@@ -123,6 +124,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);


struct gf100_gr_func {
struct gf100_gr_func {
	void (*dtor)(struct gf100_gr *);
	void (*dtor)(struct gf100_gr *);
	void (*oneinit_tiles)(struct gf100_gr *);
	int (*init)(struct gf100_gr *);
	int (*init)(struct gf100_gr *);
	void (*init_gpc_mmu)(struct gf100_gr *);
	void (*init_gpc_mmu)(struct gf100_gr *);
	void (*init_r405a14)(struct gf100_gr *);
	void (*init_r405a14)(struct gf100_gr *);
@@ -164,6 +166,7 @@ struct gf100_gr_func {
};
};


int gf100_gr_rops(struct gf100_gr *);
int gf100_gr_rops(struct gf100_gr *);
void gf100_gr_oneinit_tiles(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
int gf100_gr_init(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
void gf100_gr_init_zcull(struct gf100_gr *);
@@ -191,6 +194,7 @@ void gm107_gr_init_400054(struct gf100_gr *);


int gk20a_gr_init(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);


void gm200_gr_oneinit_tiles(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
+1 −0
Original line number Original line Diff line number Diff line
@@ -114,6 +114,7 @@ gf104_gr_pack_mmio[] = {


static const struct gf100_gr_func
static const struct gf100_gr_func
gf104_gr = {
gf104_gr = {
	.oneinit_tiles = gf100_gr_oneinit_tiles,
	.init = gf100_gr_init,
	.init = gf100_gr_init,
	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
Loading