Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit a9742b79 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-amdkfd-next-2017-12-24' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Add CWSR (compute wave save restore) support for GFX8 (Carrizo)
- Fix SDMA user-mode queues support for GFX7 (Kaveri)
- Add SDMA user-mode queues support for GFX8 (Carrizo)
- Allow HWS (hardware scheduling) to schedule multiple processes concurrently
- Add debugfs support
- Simplify process locking and lock dependencies
- Refactoring topology code to prepare for dGPU support + fixes to that code
  - Add option to generate dummy/virtual CRAT table when its missing or deformed
  - Recognize CPUs other then APUs as compute entities
- Various clean ups and bug fixes

I have not yet sent the dGPU topology code because it depends on a patch
for the PCI subsystem that adds PCIe atomics support. Once that patch is
upstreamed we can continue with the rest of the dGPU code.

* tag 'drm-amdkfd-next-2017-12-24' of git://people.freedesktop.org/~gabbayo/linux: (53 commits)
  drm/amdgpu: Add support for reporting VRAM usage
  drm/amdkfd: Ignore ACPI CRAT for non-APU systems
  drm/amdkfd: Module option to disable CRAT table
  drm/amdkfd: Add AQL Queue Memory flag on topology
  drm/amdkfd: Fixup incorrect info in the CZ CRAT table
  drm/amdkfd: Add perf counters to topology
  drm/amdkfd: Add topology support for dGPUs
  drm/amdkfd: Add topology support for CPUs
  drm/amdkfd: Fix sibling_map[] size
  drm/amdkfd: Simplify counting of memory banks
  drm/amdkfd: Turn verbose topology messages into pr_debug
  drm/amdkfd: sync IOLINK defines to thunk spec
  drm/amdkfd: Support enumerating non-GPU devices
  drm/amdkfd: Decouple CRAT parsing from device list update
  drm/amdkfd: Reorganize CRAT fetching from ACPI
  drm/amdkfd: Group up CRAT related functions
  drm/amdkfd: Fix memory leaks in kfd topology
  drm/amdkfd: Topology: Fix location_id
  drm/amdkfd: Update number of compute unit from KGD
  drm/amd: Remove get_vmem_size from KGD-KFD interface
  ...
parents 35087762 9f0a0b41
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -959,6 +959,7 @@ struct amdgpu_gfx_config {
};

struct amdgpu_cu_info {
	uint32_t simd_per_cu;
	uint32_t max_waves_per_simd;
	uint32_t wave_front_size;
	uint32_t max_scratch_slots_per_cu;
+60 −7
Original line number Diff line number Diff line
@@ -275,14 +275,34 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
	kfree(mem);
}

uint64_t get_vmem_size(struct kgd_dev *kgd)
void get_local_mem_info(struct kgd_dev *kgd,
			struct kfd_local_mem_info *mem_info)
{
	struct amdgpu_device *adev =
		(struct amdgpu_device *)kgd;
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
					     ~((1ULL << 32) - 1);
	resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;

	memset(mem_info, 0, sizeof(*mem_info));
	if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
		mem_info->local_mem_size_public = adev->mc.visible_vram_size;
		mem_info->local_mem_size_private = adev->mc.real_vram_size -
				adev->mc.visible_vram_size;
	} else {
		mem_info->local_mem_size_public = 0;
		mem_info->local_mem_size_private = adev->mc.real_vram_size;
	}
	mem_info->vram_width = adev->mc.vram_width;

	BUG_ON(kgd == NULL);
	pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n",
			adev->mc.aper_base, aper_limit,
			mem_info->local_mem_size_public,
			mem_info->local_mem_size_private);

	return adev->mc.real_vram_size;
	if (amdgpu_sriov_vf(adev))
		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
	else
		mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
}

uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
@@ -298,6 +318,39 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	/* The sclk is in quantas of 10kHz */
	return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
	/* the sclk is in quantas of 10kHz */
	if (amdgpu_sriov_vf(adev))
		return adev->clock.default_sclk / 100;

	return amdgpu_dpm_get_sclk(adev, false) / 100;
}

void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
	struct amdgpu_cu_info acu_info = adev->gfx.cu_info;

	memset(cu_info, 0, sizeof(*cu_info));
	if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
		return;

	cu_info->cu_active_number = acu_info.number;
	cu_info->cu_ao_mask = acu_info.ao_cu_mask;
	memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
	       sizeof(acu_info.bitmap));
	cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
	cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
	cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
	cu_info->simd_per_cu = acu_info.simd_per_cu;
	cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
	cu_info->wave_front_size = acu_info.wave_front_size;
	cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
	cu_info->lds_size = acu_info.lds_size;
}

uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;

	return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
+4 −1
Original line number Diff line number Diff line
@@ -56,10 +56,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
			void **mem_obj, uint64_t *gpu_addr,
			void **cpu_ptr);
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
uint64_t get_vmem_size(struct kgd_dev *kgd);
void get_local_mem_info(struct kgd_dev *kgd,
			struct kfd_local_mem_info *mem_info);
uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);

uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);

#define read_user_wptr(mmptr, wptr, dst)				\
	({								\
+98 −13
Original line number Diff line number Diff line
@@ -105,7 +105,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
			uint32_t queue_id, uint32_t __user *wptr,
			uint32_t wptr_shift, uint32_t wptr_mask,
			struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static int kgd_hqd_dump(struct kgd_dev *kgd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
			     uint32_t __user *wptr, struct mm_struct *mm);
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
			     uint32_t engine_id, uint32_t queue_id,
			     uint32_t (**dump)[2], uint32_t *n_regs);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
				uint32_t pipe_id, uint32_t queue_id);

@@ -166,7 +173,7 @@ static int get_tile_config(struct kgd_dev *kgd,
static const struct kfd2kgd_calls kfd2kgd = {
	.init_gtt_mem_allocation = alloc_gtt_mem,
	.free_gtt_mem = free_gtt_mem,
	.get_vmem_size = get_vmem_size,
	.get_local_mem_info = get_local_mem_info,
	.get_gpu_clock_counter = get_gpu_clock_counter,
	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
	.alloc_pasid = amdgpu_vm_alloc_pasid,
@@ -177,6 +184,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.init_interrupts = kgd_init_interrupts,
	.hqd_load = kgd_hqd_load,
	.hqd_sdma_load = kgd_hqd_sdma_load,
	.hqd_dump = kgd_hqd_dump,
	.hqd_sdma_dump = kgd_hqd_sdma_dump,
	.hqd_is_occupied = kgd_hqd_is_occupied,
	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
	.hqd_destroy = kgd_hqd_destroy,
@@ -191,6 +200,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.get_fw_version = get_fw_version,
	.set_scratch_backing_va = set_scratch_backing_va,
	.get_tile_config = get_tile_config,
	.get_cu_info = get_cu_info,
	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
};

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -375,7 +386,44 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
	return 0;
}

static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
static int kgd_hqd_dump(struct kgd_dev *kgd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	uint32_t i = 0, reg;
#define HQD_N_REGS (35+4)
#define DUMP_REG(addr) do {				\
		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
			break;				\
		(*dump)[i][0] = (addr) << 2;		\
		(*dump)[i++][1] = RREG32(addr);		\
	} while (0)

	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
	if (*dump == NULL)
		return -ENOMEM;

	acquire_queue(kgd, pipe_id, queue_id);

	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);

	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
		DUMP_REG(reg);

	release_queue(kgd);

	WARN_ON_ONCE(i != HQD_N_REGS);
	*n_regs = i;

	return 0;
}

static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
			     uint32_t __user *wptr, struct mm_struct *mm)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct cik_sdma_rlc_registers *m;
@@ -410,10 +458,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
	}

	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
				m->sdma_rlc_doorbell);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
	data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
			     ENABLE, 1);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);

	if (read_user_wptr(mm, wptr, data))
		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
	else
		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
		       m->sdma_rlc_rb_rptr);

	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
				m->sdma_rlc_virtual_addr);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
@@ -423,8 +478,37 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
			m->sdma_rlc_rb_rptr_addr_lo);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
			m->sdma_rlc_rb_rptr_addr_hi);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
			m->sdma_rlc_rb_cntl);

	data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
			     RB_ENABLE, 1);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);

	return 0;
}

static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
			     uint32_t engine_id, uint32_t queue_id,
			     uint32_t (**dump)[2], uint32_t *n_regs)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
		queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
	uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4)

	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
	if (*dump == NULL)
		return -ENOMEM;

	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
		DUMP_REG(sdma_offset + reg);
	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
	     reg++)
		DUMP_REG(sdma_offset + reg);

	WARN_ON_ONCE(i != HQD_N_REGS);
	*n_regs = i;

	return 0;
}
@@ -575,7 +659,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
	struct cik_sdma_rlc_registers *m;
	uint32_t sdma_base_addr;
	uint32_t temp;
	int timeout = utimeout;
	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;

	m = get_sdma_mqd(mqd);
	sdma_base_addr = get_sdma_base_addr(m);
@@ -588,10 +672,9 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
		if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
			break;
		if (timeout <= 0)
		if (time_after(jiffies, end_jiffies))
			return -ETIME;
		msleep(20);
		timeout -= 20;
		usleep_range(500, 1000);
	}

	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
@@ -599,6 +682,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);

	m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);

	return 0;
}

+166 −18
Original line number Diff line number Diff line
@@ -45,7 +45,7 @@ enum hqd_dequeue_request_type {
	RESET_WAVES
};

struct cik_sdma_rlc_registers;
struct vi_sdma_mqd;

/*
 * Register access functions
@@ -64,7 +64,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
			uint32_t queue_id, uint32_t __user *wptr,
			uint32_t wptr_shift, uint32_t wptr_mask,
			struct mm_struct *mm);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
static int kgd_hqd_dump(struct kgd_dev *kgd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs);
static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
			     uint32_t __user *wptr, struct mm_struct *mm);
static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
			     uint32_t engine_id, uint32_t queue_id,
			     uint32_t (**dump)[2], uint32_t *n_regs);
static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
		uint32_t pipe_id, uint32_t queue_id);
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -125,7 +132,7 @@ static int get_tile_config(struct kgd_dev *kgd,
static const struct kfd2kgd_calls kfd2kgd = {
	.init_gtt_mem_allocation = alloc_gtt_mem,
	.free_gtt_mem = free_gtt_mem,
	.get_vmem_size = get_vmem_size,
	.get_local_mem_info = get_local_mem_info,
	.get_gpu_clock_counter = get_gpu_clock_counter,
	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
	.alloc_pasid = amdgpu_vm_alloc_pasid,
@@ -136,6 +143,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.init_interrupts = kgd_init_interrupts,
	.hqd_load = kgd_hqd_load,
	.hqd_sdma_load = kgd_hqd_sdma_load,
	.hqd_dump = kgd_hqd_dump,
	.hqd_sdma_dump = kgd_hqd_sdma_dump,
	.hqd_is_occupied = kgd_hqd_is_occupied,
	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
	.hqd_destroy = kgd_hqd_destroy,
@@ -152,6 +161,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
	.get_fw_version = get_fw_version,
	.set_scratch_backing_va = set_scratch_backing_va,
	.get_tile_config = get_tile_config,
	.get_cu_info = get_cu_info,
	.get_vram_usage = amdgpu_amdkfd_get_vram_usage
};

struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -268,9 +279,15 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
	return 0;
}

static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
{
	return 0;
	uint32_t retval;

	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
	pr_debug("kfd: sdma base address: 0x%x\n", retval);

	return retval;
}

static inline struct vi_mqd *get_mqd(void *mqd)
@@ -278,9 +295,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
	return (struct vi_mqd *)mqd;
}

static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
{
	return (struct cik_sdma_rlc_registers *)mqd;
	return (struct vi_sdma_mqd *)mqd;
}

static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -358,8 +375,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
	return 0;
}

static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
static int kgd_hqd_dump(struct kgd_dev *kgd,
			uint32_t pipe_id, uint32_t queue_id,
			uint32_t (**dump)[2], uint32_t *n_regs)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	uint32_t i = 0, reg;
#define HQD_N_REGS (54+4)
#define DUMP_REG(addr) do {				\
		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
			break;				\
		(*dump)[i][0] = (addr) << 2;		\
		(*dump)[i++][1] = RREG32(addr);		\
	} while (0)

	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
	if (*dump == NULL)
		return -ENOMEM;

	acquire_queue(kgd, pipe_id, queue_id);

	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
	DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);

	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
		DUMP_REG(reg);

	release_queue(kgd);

	WARN_ON_ONCE(i != HQD_N_REGS);
	*n_regs = i;

	return 0;
}

static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
			     uint32_t __user *wptr, struct mm_struct *mm)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct vi_sdma_mqd *m;
	unsigned long end_jiffies;
	uint32_t sdma_base_addr;
	uint32_t data;

	m = get_sdma_mqd(mqd);
	sdma_base_addr = get_sdma_base_addr(m);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));

	end_jiffies = msecs_to_jiffies(2000) + jiffies;
	while (true) {
		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
			break;
		if (time_after(jiffies, end_jiffies))
			return -ETIME;
		usleep_range(500, 1000);
	}
	if (m->sdma_engine_id) {
		data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
		data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
				RESUME_CTX, 0);
		WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
	} else {
		data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
		data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
				RESUME_CTX, 0);
		WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
	}

	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
			     ENABLE, 1);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);

	if (read_user_wptr(mm, wptr, data))
		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
	else
		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
		       m->sdmax_rlcx_rb_rptr);

	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
				m->sdmax_rlcx_virtual_addr);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
			m->sdmax_rlcx_rb_base_hi);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
			m->sdmax_rlcx_rb_rptr_addr_lo);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
			m->sdmax_rlcx_rb_rptr_addr_hi);

	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
			     RB_ENABLE, 1);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);

	return 0;
}

static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
			     uint32_t engine_id, uint32_t queue_id,
			     uint32_t (**dump)[2], uint32_t *n_regs)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
		queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
	uint32_t i = 0, reg;
#undef HQD_N_REGS
#define HQD_N_REGS (19+4+2+3+7)

	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
	if (*dump == NULL)
		return -ENOMEM;

	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
		DUMP_REG(sdma_offset + reg);
	for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
	     reg++)
		DUMP_REG(sdma_offset + reg);
	for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
	     reg++)
		DUMP_REG(sdma_offset + reg);
	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
	     reg++)
		DUMP_REG(sdma_offset + reg);
	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
	     reg++)
		DUMP_REG(sdma_offset + reg);

	WARN_ON_ONCE(i != HQD_N_REGS);
	*n_regs = i;

	return 0;
}

@@ -388,7 +535,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct cik_sdma_rlc_registers *m;
	struct vi_sdma_mqd *m;
	uint32_t sdma_base_addr;
	uint32_t sdma_rlc_rb_cntl;

@@ -509,10 +656,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
				unsigned int utimeout)
{
	struct amdgpu_device *adev = get_amdgpu_device(kgd);
	struct cik_sdma_rlc_registers *m;
	struct vi_sdma_mqd *m;
	uint32_t sdma_base_addr;
	uint32_t temp;
	int timeout = utimeout;
	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;

	m = get_sdma_mqd(mqd);
	sdma_base_addr = get_sdma_base_addr(m);
@@ -523,18 +670,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,

	while (true) {
		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
		if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
			break;
		if (timeout <= 0)
		if (time_after(jiffies, end_jiffies))
			return -ETIME;
		msleep(20);
		timeout -= 20;
		usleep_range(500, 1000);
	}

	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);

	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);

	return 0;
}
Loading