drm/nvc0-/gr: generate grctx template at init time, not first context ctor (ac1499d9) · Commits · e / devices / android_kernel_xiaomi_markw

drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.c

+229 −40

Original line number	Diff line number	Diff line
		@@ -35,6 +35,156 @@ nv_icmd(struct drm_device *priv, u32 icmd, u32 data)
		while (nv_rd32(priv, 0x400700) & 2) {}
		}

		int
		nvc0_grctx_init(struct drm_device priv, struct nvc0_graph_priv oprv,
		struct nvc0_grctx *info)
		{
		struct nouveau_gpuobj *chan;
		u32 size = (0x80000 + oprv->size + 4095) & ~4095;
		int ret, i;

		/* allocate memory to for a "channel", which we'll use to generate
		* the default context values
		*/
		ret = nouveau_gpuobj_new(priv, NULL, size, 0x1000,
		NVOBJ_FLAG_ZERO_ALLOC, &info->chan);
		chan = info->chan;
		if (ret) {
		NV_ERROR(priv, "failed to allocate channel memory, %d\n", ret);
		return ret;
		}

		/* PGD pointer */
		nv_wo32(chan, 0x0200, lower_32_bits(chan->addr + 0x1000));
		nv_wo32(chan, 0x0204, upper_32_bits(chan->addr + 0x1000));
		nv_wo32(chan, 0x0208, 0xffffffff);
		nv_wo32(chan, 0x020c, 0x000000ff);

		/* PGT[0] pointer */
		nv_wo32(chan, 0x1000, 0x00000000);
		nv_wo32(chan, 0x1004, 0x00000001 \| (chan->addr + 0x2000) >> 8);

		/* identity-map the whole "channel" into its own vm */
		for (i = 0; i < size / 4096; i++) {
		u64 addr = ((chan->addr + (i * 4096)) >> 8) \| 1;
		nv_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr));
		nv_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr));
		}

		/* context pointer (virt) */
		nv_wo32(chan, 0x0210, 0x00080004);
		nv_wo32(chan, 0x0214, 0x00000000);

		nvimem_flush(priv);

		nv_wr32(priv, 0x100cb8, (chan->addr + 0x1000) >> 8);
		nv_wr32(priv, 0x100cbc, 0x80000001);
		nv_wait(priv, 0x100c80, 0x00008000, 0x00008000);

		/* setup default state for mmio list construction */
		info->dev = priv;
		info->data = oprv->mmio_data;
		info->mmio = oprv->mmio_list;
		info->addr = 0x2000 + (i * 8);
		info->priv = oprv;
		info->buffer_nr = 0;

		if (oprv->firmware) {
		nv_wr32(priv, 0x409840, 0x00000030);
		nv_wr32(priv, 0x409500, 0x80000000 \| chan->addr >> 12);
		nv_wr32(priv, 0x409504, 0x00000003);
		if (!nv_wait(priv, 0x409800, 0x00000010, 0x00000010))
		NV_ERROR(priv, "load_ctx timeout\n");

		nv_wo32(chan, 0x8001c, 1);
		nv_wo32(chan, 0x80020, 0);
		nv_wo32(chan, 0x80028, 0);
		nv_wo32(chan, 0x8002c, 0);
		nvimem_flush(priv);
		return 0;
		}

		/* HUB_FUC(SET_CHAN) */
		nv_wr32(priv, 0x409840, 0x80000000);
		nv_wr32(priv, 0x409500, 0x80000000 \| chan->addr >> 12);
		nv_wr32(priv, 0x409504, 0x00000001);
		if (!nv_wait(priv, 0x409800, 0x80000000, 0x80000000)) {
		NV_ERROR(priv, "HUB_SET_CHAN timeout\n");
		nvc0_graph_ctxctl_debug(priv);
		nouveau_gpuobj_ref(NULL, &info->chan);
		return -EBUSY;
		}

		return 0;
		}

		void
		nvc0_grctx_data(struct nvc0_grctx *info, u32 size, u32 align, u32 access)
		{
		info->buffer[info->buffer_nr] = info->addr;
		info->buffer[info->buffer_nr] += (align - 1);
		info->buffer[info->buffer_nr] &= ~(align - 1);
		info->addr = info->buffer[info->buffer_nr++] + size;

		info->data->size = size;
		info->data->align = align;
		info->data->access = access;
		info->data++;
		}

		void
		nvc0_grctx_mmio(struct nvc0_grctx *info, u32 addr, u32 data, u32 shift, u32 buf)
		{
		info->mmio->addr = addr;
		info->mmio->data = data;
		info->mmio->shift = shift;
		info->mmio->buffer = buf;
		info->mmio++;

		if (shift)
		data \|= info->buffer[buf] >> shift;
		nv_wr32(info->dev, addr, data);
		}

		int
		nvc0_grctx_fini(struct nvc0_grctx *info)
		{
		struct nvc0_graph_priv *priv = info->priv;
		int i;

		if (priv->firmware) {
		nv_wr32(info->dev, 0x409840, 0x00000003);
		nv_wr32(info->dev, 0x409500, 0x80000000 \| info->chan->addr >> 12);
		nv_wr32(info->dev, 0x409504, 0x00000009);
		if (!nv_wait(info->dev, 0x409800, 0x00000001, 0x00000000)) {
		NV_ERROR(info->dev, "unload_ctx timeout\n");
		return -EBUSY;
		}

		goto save;
		}

		/* HUB_FUC(CTX_SAVE) */
		nv_wr32(info->dev, 0x409840, 0x80000000);
		nv_wr32(info->dev, 0x409500, 0x80000000 \| info->chan->addr >> 12);
		nv_wr32(info->dev, 0x409504, 0x00000002);
		if (!nv_wait(info->dev, 0x409800, 0x80000000, 0x80000000)) {
		NV_ERROR(info->dev, "HUB_CTX_SAVE timeout\n");
		nvc0_graph_ctxctl_debug(info->dev);
		return -EBUSY;
		}

		save:
		priv->data = kmalloc(priv->size, GFP_KERNEL);
		if (priv->data) {
		for (i = 0; i < priv->size; i += 4)
		priv->data[i / 4] = nv_ro32(info->chan, 0x80000 + i);
		}

		nouveau_gpuobj_ref(NULL, &info->chan);
		return priv->data ? 0 : -ENOMEM;
		}

		static void
		nvc0_grctx_generate_9097(struct drm_device *priv)
		{
		@@ -1779,16 +1929,19 @@ nvc0_grctx_generate_tp(struct drm_device *priv)
		}

		int
		nvc0_grctx_generate(struct nouveau_channel *chan)
		nvc0_grctx_generate(struct drm_device *priv)
		{
		struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
		struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
		struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
		struct drm_device *priv = chan->dev;
		int i, gpc, tp, id;
		struct drm_nouveau_private *dev_priv = priv->dev_private;
		struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR);
		struct nvc0_grctx info;
		int ret, i, gpc, tpc, id;
		u32 fermi = nvc0_graph_class(priv);
		u32 r000260, tmp;

		ret = nvc0_grctx_init(priv, oprv, &info);
		if (ret)
		return ret;

		r000260 = nv_rd32(priv, 0x000260);
		nv_wr32(priv, 0x000260, r000260 & ~1);
		nv_wr32(priv, 0x400208, 0x00000000);
		@@ -1808,19 +1961,55 @@ nvc0_grctx_generate(struct nouveau_channel *chan)

		nv_wr32(priv, 0x404154, 0x00000000);

		/* fuc "mmio list" writes */
		for (i = 0; i < grch->mmio_nr * 8; i += 8) {
		u32 reg = nv_ro32(grch->mmio, i + 0);
		nv_wr32(priv, reg, nv_ro32(grch->mmio, i + 4));
		/* generate per-context mmio list data */
		mmio_data(0x002000, 0x0100, NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS);
		mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS);
		mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW);
		mmio_list(0x408004, 0x00000000, 8, 0);
		mmio_list(0x408008, 0x80000018, 0, 0);
		mmio_list(0x40800c, 0x00000000, 8, 1);
		mmio_list(0x408010, 0x80000000, 0, 0);
		mmio_list(0x418810, 0x80000000, 12, 2);
		mmio_list(0x419848, 0x10000000, 12, 2);
		mmio_list(0x419004, 0x00000000, 8, 1);
		mmio_list(0x419008, 0x00000000, 0, 0);
		mmio_list(0x418808, 0x00000000, 8, 0);
		mmio_list(0x41880c, 0x80000018, 0, 0);
		if (dev_priv->chipset != 0xc1) {
		tmp = 0x02180000;
		mmio_list(0x405830, tmp, 0, 0);
		for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
		for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
		u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
		mmio_list(reg, tmp, 0, 0);
		tmp += 0x0324;
		}
		}
		} else {
		tmp = 0x02180000;
		mmio_list(0x405830, 0x00000218 \| tmp, 0, 0);
		mmio_list(0x4064c4, 0x0086ffff, 0, 0);
		for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
		for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
		u32 reg = TPC_UNIT(gpc, tpc, 0x0520);
		mmio_list(reg, 0x10000000 \| tmp, 0, 0);
		tmp += 0x0324;
		}
		for (tpc = 0; tpc < oprv->tpc_nr[gpc]; tpc++) {
		u32 reg = TPC_UNIT(gpc, tpc, 0x0544);
		mmio_list(reg, tmp, 0, 0);
		tmp += 0x0324;
		}
		}
		}

		for (tp = 0, id = 0; tp < 4; tp++) {
		for (tpc = 0, id = 0; tpc < 4; tpc++) {
		for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
		if (tp < oprv->tpc_nr[gpc]) {
		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x698), id);
		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x4e8), id);
		nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tp * 4), id);
		nv_wr32(priv, TPC_UNIT(gpc, tp, 0x088), id);
		if (tpc < oprv->tpc_nr[gpc]) {
		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id);
		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x4e8), id);
		nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id);
		id++;
		}

		@@ -1843,18 +2032,18 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
		nv_wr32(priv, 0x40587c, 0x00000000);

		if (1) {
		u8 tpnr[GPC_MAX], data[TPC_MAX];
		u8 tpcnr[GPC_MAX], data[TPC_MAX];

		memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
		memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
		memset(data, 0x1f, sizeof(data));

		gpc = -1;
		for (tp = 0; tp < oprv->tpc_total; tp++) {
		for (tpc = 0; tpc < oprv->tpc_total; tpc++) {
		do {
		gpc = (gpc + 1) % oprv->gpc_nr;
		} while (!tpnr[gpc]);
		tpnr[gpc]--;
		data[tp] = gpc;
		} while (!tpcnr[gpc]);
		tpcnr[gpc]--;
		data[tpc] = gpc;
		}

		for (i = 0; i < 4; i++)
		@@ -1863,24 +2052,24 @@ nvc0_grctx_generate(struct nouveau_channel *chan)

		if (1) {
		u32 data[6] = {}, data2[2] = {};
		u8 tpnr[GPC_MAX];
		u8 tpcnr[GPC_MAX];
		u8 shift, ntpcv;

		/* calculate first set of magics */
		memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
		memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));

		gpc = -1;
		for (tp = 0; tp < oprv->tpc_total; tp++) {
		for (tpc = 0; tpc < oprv->tpc_total; tpc++) {
		do {
		gpc = (gpc + 1) % oprv->gpc_nr;
		} while (!tpnr[gpc]);
		tpnr[gpc]--;
		} while (!tpcnr[gpc]);
		tpcnr[gpc]--;

		data[tp / 6] \|= gpc << ((tp % 6) * 5);
		data[tpc / 6] \|= gpc << ((tpc % 6) * 5);
		}

		for (; tp < 32; tp++)
		data[tp / 6] \|= 7 << ((tp % 6) * 5);
		for (; tpc < 32; tpc++)
		data[tpc / 6] \|= 7 << ((tpc % 6) * 5);

		/* and the second... */
		shift = 0;
		@@ -1918,12 +2107,12 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
		}

		if (1) {
		u32 tp_mask = 0, tp_set = 0;
		u8 tpnr[GPC_MAX], a, b;
		u32 tpc_mask = 0, tpc_set = 0;
		u8 tpcnr[GPC_MAX], a, b;

		memcpy(tpnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
		memcpy(tpcnr, oprv->tpc_nr, sizeof(oprv->tpc_nr));
		for (gpc = 0; gpc < oprv->gpc_nr; gpc++)
		tp_mask \|= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8);
		tpc_mask \|= ((1 << oprv->tpc_nr[gpc]) - 1) << (gpc * 8);

		for (i = 0, gpc = -1, b = -1; i < 32; i++) {
		a = (i * (oprv->tpc_total - 1)) / 32;
		@@ -1931,14 +2120,14 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
		b = a;
		do {
		gpc = (gpc + 1) % oprv->gpc_nr;
		} while (!tpnr[gpc]);
		tp = oprv->tpc_nr[gpc] - tpnr[gpc]--;
		} while (!tpcnr[gpc]);
		tpc = oprv->tpc_nr[gpc] - tpcnr[gpc]--;

		tp_set \|= 1 << ((gpc * 8) + tp);
		tpc_set \|= 1 << ((gpc * 8) + tpc);
		}

		nv_wr32(priv, 0x406800 + (i * 0x20), tp_set);
		nv_wr32(priv, 0x406c00 + (i * 0x20), tp_set ^ tp_mask);
		nv_wr32(priv, 0x406800 + (i * 0x20), tpc_set);
		nv_wr32(priv, 0x406c00 + (i * 0x20), tpc_set ^ tpc_mask);
		}
		}

		@@ -2867,5 +3056,5 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
		nvc0_grctx_generate_90c0(priv);

		nv_wr32(priv, 0x000260, r000260);
		return 0;
		return nvc0_grctx_fini(&info);
		}

drivers/gpu/drm/nouveau/core/engine/graph/ctxnve0.c

+40 −10

Original line number	Diff line number	Diff line
		@@ -2604,16 +2604,20 @@ nve0_graph_generate_tpcunk(struct drm_device *priv)
		}

		int
		nve0_grctx_generate(struct nouveau_channel *chan)
		nve0_grctx_generate(struct drm_device *priv)
		{
		struct nvc0_graph_priv *oprv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
		struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
		struct drm_device *priv = chan->dev;
		struct nvc0_graph_priv *oprv = nv_engine(priv, NVOBJ_ENGINE_GR);
		struct nvc0_grctx info;
		int ret, i, gpc, tpc, id;
		u32 data[6] = {}, data2[2] = {}, tmp;
		u32 tpc_set = 0, tpc_mask = 0;
		u32 magic[GPC_MAX][2], offset;
		u8 tpcnr[GPC_MAX], a, b;
		u8 shift, ntpcv;
		int i, gpc, tpc, id;

		ret = nvc0_grctx_init(priv, oprv, &info);
		if (ret)
		return ret;

		nv_mask(priv, 0x000260, 0x00000001, 0x00000000);
		nv_wr32(priv, 0x400204, 0x00000000);
		@@ -2636,11 +2640,37 @@ nve0_grctx_generate(struct nouveau_channel *chan)

		nv_wr32(priv, 0x404154, 0x0);

		for (i = 0; i < grch->mmio_nr * 8; i += 8) {
		u32 reg = nv_ro32(grch->mmio, i + 0);
		u32 val = nv_ro32(grch->mmio, i + 4);
		nv_wr32(priv, reg, val);
		mmio_data(0x003000, 0x0100, NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS);
		mmio_data(0x008000, 0x0100, NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS);
		mmio_data(0x060000, 0x1000, NV_MEM_ACCESS_RW);
		mmio_list(0x40800c, 0x00000000, 8, 1);
		mmio_list(0x408010, 0x80000000, 0, 0);
		mmio_list(0x419004, 0x00000000, 8, 1);
		mmio_list(0x419008, 0x00000000, 0, 0);
		mmio_list(0x4064cc, 0x80000000, 0, 0);
		mmio_list(0x408004, 0x00000000, 8, 0);
		mmio_list(0x408008, 0x80000030, 0, 0);
		mmio_list(0x418808, 0x00000000, 8, 0);
		mmio_list(0x41880c, 0x80000030, 0, 0);
		mmio_list(0x4064c8, 0x01800600, 0, 0);
		mmio_list(0x418810, 0x80000000, 12, 2);
		mmio_list(0x419848, 0x10000000, 12, 2);
		mmio_list(0x405830, 0x02180648, 0, 0);
		mmio_list(0x4064c4, 0x0192ffff, 0, 0);
		for (gpc = 0, offset = 0; gpc < oprv->gpc_nr; gpc++) {
		u16 magic0 = 0x0218 * oprv->tpc_nr[gpc];
		u16 magic1 = 0x0648 * oprv->tpc_nr[gpc];
		magic[gpc][0] = 0x10000000 \| (magic0 << 16) \| offset;
		magic[gpc][1] = 0x00000000 \| (magic1 << 16);
		offset += 0x0324 * oprv->tpc_nr[gpc];
		}
		for (gpc = 0; gpc < oprv->gpc_nr; gpc++) {
		mmio_list(GPC_UNIT(gpc, 0x30c0), magic[gpc][0], 0, 0);
		mmio_list(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] \| offset, 0, 0);
		offset += 0x07ff * oprv->tpc_nr[gpc];
		}
		mmio_list(0x17e91c, 0x06060609, 0, 0);
		mmio_list(0x17e920, 0x00090a05, 0, 0);

		nv_wr32(priv, 0x418c6c, 0x1);
		nv_wr32(priv, 0x41980c, 0x10);
		@@ -2758,5 +2788,5 @@ nve0_grctx_generate(struct nouveau_channel *chan)
		nv_mask(priv, 0x000260, 0x00000001, 0x00000001);
		nv_wr32(priv, 0x418800, 0x7026860a); //XXX
		nv_wr32(priv, 0x41be10, 0x00bb8bc7); //XXX
		return 0;
		return nvc0_grctx_fini(&info);
		}

drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c

+150 −300

File changed.

Preview size limit exceeded, changes collapsed.

drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h

+19 −11

Original line number	Diff line number	Diff line
		@@ -66,27 +66,28 @@ struct nvc0_graph_priv {
		u8 tpc_nr[GPC_MAX];
		u8 tpc_total;

		u32 grctx_size;
		u32 *grctx_vals;
		struct nouveau_gpuobj *unk4188b4;
		struct nouveau_gpuobj *unk4188b8;

		struct nvc0_graph_data mmio_data[4];
		struct nvc0_graph_mmio mmio_list[4096/8];
		u32 size;
		u32 *data;

		u8 magic_not_rop_nr;
		};

		struct nvc0_graph_chan {
		struct nouveau_gpuobj *grctx;
		struct nouveau_vma grctx_vma;
		struct nouveau_gpuobj unk408004; / 0x418808 too */
		struct nouveau_vma unk408004_vma;
		struct nouveau_gpuobj unk40800c; / 0x419004 too */
		struct nouveau_vma unk40800c_vma;
		struct nouveau_gpuobj unk418810; / 0x419848 too */
		struct nouveau_vma unk418810_vma;

		struct nouveau_gpuobj *mmio;
		struct nouveau_vma mmio_vma;
		int mmio_nr;
		struct {
		struct nouveau_gpuobj *mem;
		struct nouveau_vma vma;
		} data[4];
		};

		static inline u32
		@@ -124,6 +125,7 @@ nv_mthd(struct drm_device *priv, u32 class, u32 mthd, u32 data)
		}

		struct nvc0_grctx {
		struct drm_device *dev;
		struct nvc0_graph_priv *priv;
		struct nvc0_graph_data *data;
		struct nvc0_graph_mmio *mmio;
		@@ -133,13 +135,14 @@ struct nvc0_grctx {
		u64 addr;
		};

		int nvc0_grctx_generate(struct nouveau_channel *);
		int nvc0_grctx_init(struct nvc0_graph_priv , struct nvc0_grctx );
		int nvc0_grctx_generate(struct drm_device *);
		int nvc0_grctx_init(struct drm_device , struct nvc0_graph_priv ,
		struct nvc0_grctx *);
		void nvc0_grctx_data(struct nvc0_grctx *, u32, u32, u32);
		void nvc0_grctx_mmio(struct nvc0_grctx *, u32, u32, u32, u32);
		int nvc0_grctx_fini(struct nvc0_grctx *);

		int nve0_grctx_generate(struct nouveau_channel *);
		int nve0_grctx_generate(struct drm_device *);

		#define mmio_data(s,a,p) nvc0_grctx_data(&info, (s), (a), (p))
		#define mmio_list(r,d,s,b) nvc0_grctx_mmio(&info, (r), (d), (s), (b))
		@@ -154,4 +157,9 @@ int nvc0_graph_context_ctor(struct nouveau_object , struct nouveau_object ,
		struct nouveau_object **);
		void nvc0_graph_context_dtor(struct nouveau_object *);

		void nvc0_graph_ctxctl_debug(struct drm_device *);

		int nvc0_graph_context_new(struct nouveau_channel *, int);
		void nvc0_graph_context_del(struct nouveau_channel *, int);

		#endif

drivers/gpu/drm/nouveau/core/engine/graph/nve0.c

+22 −243

Original line number	Diff line number	Diff line
		@@ -57,243 +57,6 @@ nve0_graph_ctxctl_debug(struct drm_device *dev)
		nve0_graph_ctxctl_debug_unit(dev, 0x502000 + (gpc * 0x8000));
		}

		static int
		nve0_graph_load_context(struct nouveau_channel *chan)
		{
		struct drm_device *dev = chan->dev;

		nv_wr32(dev, 0x409840, 0x00000030);
		nv_wr32(dev, 0x409500, 0x80000000 \| chan->ramin->addr >> 12);
		nv_wr32(dev, 0x409504, 0x00000003);
		if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
		NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");

		return 0;
		}

		static int
		nve0_graph_unload_context_to(struct drm_device *dev, u64 chan)
		{
		nv_wr32(dev, 0x409840, 0x00000003);
		nv_wr32(dev, 0x409500, 0x80000000 \| chan >> 12);
		nv_wr32(dev, 0x409504, 0x00000009);
		if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
		NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
		return -EBUSY;
		}

		return 0;
		}

		static int
		nve0_graph_construct_context(struct nouveau_channel *chan)
		{
		struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
		struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
		struct drm_device *dev = chan->dev;
		int ret, i;
		u32 *ctx;

		ctx = kmalloc(priv->grctx_size, GFP_KERNEL);
		if (!ctx)
		return -ENOMEM;

		nve0_graph_load_context(chan);

		nv_wo32(grch->grctx, 0x1c, 1);
		nv_wo32(grch->grctx, 0x20, 0);
		nv_wo32(grch->grctx, 0x28, 0);
		nv_wo32(grch->grctx, 0x2c, 0);
		nvimem_flush(dev);

		ret = nve0_grctx_generate(chan);
		if (ret)
		goto err;

		ret = nve0_graph_unload_context_to(dev, chan->ramin->addr);
		if (ret)
		goto err;

		for (i = 0; i < priv->grctx_size; i += 4)
		ctx[i / 4] = nv_ro32(grch->grctx, i);

		priv->grctx_vals = ctx;
		return 0;

		err:
		kfree(ctx);
		return ret;
		}

		static int
		nve0_graph_create_context_mmio_list(struct nouveau_channel *chan)
		{
		struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
		struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
		struct drm_device *dev = chan->dev;
		u32 magic[GPC_MAX][2];
		u16 offset = 0x0000;
		int gpc;
		int ret;

		ret = nouveau_gpuobj_new(dev, NULL, 0x3000, 256, 0, &grch->unk408004);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_map_vm(grch->unk408004, chan->vm,
		NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS,
		&grch->unk408004_vma);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_new(dev, NULL, 0x8000, 256, 0, &grch->unk40800c);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_map_vm(grch->unk40800c, chan->vm,
		NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS,
		&grch->unk40800c_vma);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_new(dev, NULL, 384 * 1024, 4096, 0,
		&grch->unk418810);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_map_vm(grch->unk418810, chan->vm,
		NV_MEM_ACCESS_RW, &grch->unk418810_vma);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0, 0, &grch->mmio);
		if (ret)
		return ret;

		ret = nouveau_gpuobj_map_vm(grch->mmio, chan->vm,
		NV_MEM_ACCESS_RW \| NV_MEM_ACCESS_SYS,
		&grch->mmio_vma);
		if (ret)
		return ret;

		#define mmio(r,v) do { \
		nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 0, (r)); \
		nv_wo32(grch->mmio, (grch->mmio_nr * 8) + 4, (v)); \
		grch->mmio_nr++; \
		} while (0)
		mmio(0x40800c, grch->unk40800c_vma.offset >> 8);
		mmio(0x408010, 0x80000000);
		mmio(0x419004, grch->unk40800c_vma.offset >> 8);
		mmio(0x419008, 0x00000000);
		mmio(0x4064cc, 0x80000000);
		mmio(0x408004, grch->unk408004_vma.offset >> 8);
		mmio(0x408008, 0x80000030);
		mmio(0x418808, grch->unk408004_vma.offset >> 8);
		mmio(0x41880c, 0x80000030);
		mmio(0x4064c8, 0x01800600);
		mmio(0x418810, 0x80000000 \| grch->unk418810_vma.offset >> 12);
		mmio(0x419848, 0x10000000 \| grch->unk418810_vma.offset >> 12);
		mmio(0x405830, 0x02180648);
		mmio(0x4064c4, 0x0192ffff);

		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
		u16 magic0 = 0x0218 * priv->tpc_nr[gpc];
		u16 magic1 = 0x0648 * priv->tpc_nr[gpc];
		magic[gpc][0] = 0x10000000 \| (magic0 << 16) \| offset;
		magic[gpc][1] = 0x00000000 \| (magic1 << 16);
		offset += 0x0324 * priv->tpc_nr[gpc];
		}

		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
		mmio(GPC_UNIT(gpc, 0x30c0), magic[gpc][0]);
		mmio(GPC_UNIT(gpc, 0x30e4), magic[gpc][1] \| offset);
		offset += 0x07ff * priv->tpc_nr[gpc];
		}

		mmio(0x17e91c, 0x06060609);
		mmio(0x17e920, 0x00090a05);
		#undef mmio
		return 0;
		}

		static int
		nve0_graph_context_new(struct nouveau_channel *chan, int engine)
		{
		struct drm_device *dev = chan->dev;
		struct nvc0_graph_priv *priv = nv_engine(dev, engine);
		struct nvc0_graph_chan *grch;
		struct nouveau_gpuobj *grctx;
		int ret, i;

		grch = kzalloc(sizeof(*grch), GFP_KERNEL);
		if (!grch)
		return -ENOMEM;
		chan->engctx[NVOBJ_ENGINE_GR] = grch;

		ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256, 0,
		&grch->grctx);
		if (ret)
		goto error;

		ret = nouveau_gpuobj_map_vm(grch->grctx, chan->vm, NV_MEM_ACCESS_RW \|
		NV_MEM_ACCESS_SYS, &grch->grctx_vma);
		if (ret)
		return ret;

		grctx = grch->grctx;

		ret = nve0_graph_create_context_mmio_list(chan);
		if (ret)
		goto error;

		nv_wo32(chan->ramin, 0x0210, lower_32_bits(grch->grctx_vma.offset) \| 4);
		nv_wo32(chan->ramin, 0x0214, upper_32_bits(grch->grctx_vma.offset));
		nvimem_flush(dev);

		if (!priv->grctx_vals) {
		ret = nve0_graph_construct_context(chan);
		if (ret)
		goto error;
		}

		for (i = 0; i < priv->grctx_size; i += 4)
		nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
		nv_wo32(grctx, 0xf4, 0);
		nv_wo32(grctx, 0xf8, 0);
		nv_wo32(grctx, 0x10, grch->mmio_nr);
		nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio_vma.offset));
		nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio_vma.offset));
		nv_wo32(grctx, 0x1c, 1);
		nv_wo32(grctx, 0x20, 0);
		nv_wo32(grctx, 0x28, 0);
		nv_wo32(grctx, 0x2c, 0);

		nvimem_flush(dev);
		return 0;

		error:
		priv->base.context_del(chan, engine);
		return ret;
		}

		static void
		nve0_graph_context_del(struct nouveau_channel *chan, int engine)
		{
		struct nvc0_graph_chan *grch = chan->engctx[engine];

		nouveau_gpuobj_unmap(&grch->mmio_vma);
		nouveau_gpuobj_unmap(&grch->unk418810_vma);
		nouveau_gpuobj_unmap(&grch->unk40800c_vma);
		nouveau_gpuobj_unmap(&grch->unk408004_vma);
		nouveau_gpuobj_unmap(&grch->grctx_vma);
		nouveau_gpuobj_ref(NULL, &grch->mmio);
		nouveau_gpuobj_ref(NULL, &grch->unk418810);
		nouveau_gpuobj_ref(NULL, &grch->unk40800c);
		nouveau_gpuobj_ref(NULL, &grch->unk408004);
		nouveau_gpuobj_ref(NULL, &grch->grctx);
		chan->engctx[engine] = NULL;
		}

		static int
		nve0_graph_object_new(struct nouveau_channel *chan, int engine,
		u32 handle, u16 class)
		@@ -487,7 +250,7 @@ nve0_graph_init_ctxctl(struct drm_device *dev)
		NV_ERROR(dev, "fuc09 req 0x10 timeout\n");
		return -EBUSY;
		}
		priv->grctx_size = nv_rd32(dev, 0x409800);
		priv->size = nv_rd32(dev, 0x409800);

		nv_wr32(dev, 0x409840, 0xffffffff);
		nv_wr32(dev, 0x409500, 0x00000000);
		@@ -534,6 +297,17 @@ nve0_graph_init_ctxctl(struct drm_device *dev)
		nv_wr32(dev, 0x409614, 0x00000070);
		nv_wr32(dev, 0x409614, 0x00000770);
		nv_wr32(dev, 0x40802c, 0x00000001);

		if (priv->data == NULL) {
		int ret = nve0_grctx_generate(dev);
		if (ret) {
		NV_ERROR(dev, "PGRAPH: failed to construct context\n");
		return ret;
		}

		return 1;
		}

		return 0;
		}

		@@ -542,6 +316,7 @@ nve0_graph_init(struct drm_device *dev, int engine)
		{
		int ret;

		reset:
		nv_mask(dev, 0x000200, 0x18001000, 0x00000000);
		nv_mask(dev, 0x000200, 0x18001000, 0x18001000);

		@@ -566,8 +341,11 @@ nve0_graph_init(struct drm_device *dev, int engine)
		nv_wr32(dev, 0x400054, 0x34ce3464);

		ret = nve0_graph_init_ctxctl(dev);
		if (ret)
		if (ret) {
		if (ret == 1)
		goto reset;
		return ret;
		}

		return 0;
		}
		@@ -758,8 +536,8 @@ nve0_graph_destroy(struct drm_device *dev, int engine)
		nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
		nouveau_gpuobj_ref(NULL, &priv->unk4188b4);

		if (priv->grctx_vals)
		kfree(priv->grctx_vals);
		if (priv->data)
		kfree(priv->data);

		NVOBJ_ENGINE_DEL(dev, GR);
		kfree(priv);
		@@ -786,8 +564,8 @@ nve0_graph_create(struct drm_device *dev)
		priv->base.destroy = nve0_graph_destroy;
		priv->base.init = nve0_graph_init;
		priv->base.fini = nve0_graph_fini;
		priv->base.context_new = nve0_graph_context_new;
		priv->base.context_del = nve0_graph_context_del;
		priv->base.context_new = nvc0_graph_context_new;
		priv->base.context_del = nvc0_graph_context_del;
		priv->base.object_new = nve0_graph_object_new;

		NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
		@@ -801,6 +579,7 @@ nve0_graph_create(struct drm_device *dev)
		ret = 0;
		goto error;
		}
		priv->firmware = true;

		ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
		if (ret)