Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 145b03eb authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: create mmhub ras framework



enable mmhub ras feature and create sysfs/debugfs node for mmhub

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9fb2d8de
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -177,7 +177,8 @@ struct amdgpu_gmc {

	struct amdgpu_xgmi xgmi;
	struct amdgpu_irq_src	ecc_irq;
	struct ras_common_if    *ras_if;
	struct ras_common_if    *umc_ras_if;
	struct ras_common_if    *mmhub_ras_if;
};

#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
+80 −32
Original line number Diff line number Diff line
@@ -266,7 +266,7 @@ static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
		struct amdgpu_irq_src *source,
		struct amdgpu_iv_entry *entry)
{
	struct ras_common_if *ras_if = adev->gmc.ras_if;
	struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
	struct ras_dispatch_if ih_data = {
		.entry = entry,
	};
@@ -740,27 +740,25 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
	return 0;
}

static int gmc_v9_0_ecc_late_init(void *handle)
static int gmc_v9_0_ecc_ras_block_late_init(void *handle,
			struct ras_fs_if *fs_info, struct ras_common_if *ras_block)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	struct ras_common_if **ras_if = &adev->gmc.ras_if;
	struct ras_common_if **ras_if = NULL;
	struct ras_ih_if ih_info = {
		.cb = gmc_v9_0_process_ras_data_cb,
	};
	struct ras_fs_if fs_info = {
		.sysfs_name = "umc_err_count",
		.debugfs_name = "umc_err_inject",
	};
	struct ras_common_if ras_block = {
		.block = AMDGPU_RAS_BLOCK__UMC,
		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
		.sub_block_index = 0,
		.name = "umc",
	};
	int r;

	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) {
		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
		ras_if = &adev->gmc.umc_ras_if;
	else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB)
		ras_if = &adev->gmc.mmhub_ras_if;
	else
		BUG();

	if (!amdgpu_ras_is_supported(adev, ras_block->block)) {
		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
		return 0;
	}

@@ -775,7 +773,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
			if (r == -EAGAIN) {
				/* request a gpu reset. will run again. */
				amdgpu_ras_request_reset_on_boot(adev,
						AMDGPU_RAS_BLOCK__UMC);
						ras_block->block);
				return 0;
			}
			/* fail to enable ras, cleanup all. */
@@ -789,40 +787,45 @@ static int gmc_v9_0_ecc_late_init(void *handle)
	if (!*ras_if)
		return -ENOMEM;

	**ras_if = ras_block;
	**ras_if = *ras_block;

	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
	if (r) {
		if (r == -EAGAIN) {
			amdgpu_ras_request_reset_on_boot(adev,
					AMDGPU_RAS_BLOCK__UMC);
					ras_block->block);
			r = 0;
		}
		goto feature;
	}

	ih_info.head = **ras_if;
	fs_info.head = **ras_if;
	fs_info->head = **ras_if;

	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
		r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
		if (r)
			goto interrupt;
	}

	amdgpu_ras_debugfs_create(adev, &fs_info);
	amdgpu_ras_debugfs_create(adev, fs_info);

	r = amdgpu_ras_sysfs_create(adev, &fs_info);
	r = amdgpu_ras_sysfs_create(adev, fs_info);
	if (r)
		goto sysfs;
resume:
	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) {
		r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
		if (r)
			goto irq;
	}

	return 0;
irq:
	amdgpu_ras_sysfs_remove(adev, *ras_if);
sysfs:
	amdgpu_ras_debugfs_remove(adev, *ras_if);
	if (ras_block->block == AMDGPU_RAS_BLOCK__UMC)
		amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
interrupt:
	amdgpu_ras_feature_enable(adev, *ras_if, 0);
@@ -832,6 +835,40 @@ static int gmc_v9_0_ecc_late_init(void *handle)
	return r;
}

static int gmc_v9_0_ecc_late_init(void *handle)
{
	int r;

	struct ras_fs_if umc_fs_info = {
		.sysfs_name = "umc_err_count",
		.debugfs_name = "umc_err_inject",
	};
	struct ras_common_if umc_ras_block = {
		.block = AMDGPU_RAS_BLOCK__UMC,
		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
		.sub_block_index = 0,
		.name = "umc",
	};
	struct ras_fs_if mmhub_fs_info = {
		.sysfs_name = "mmhub_err_count",
		.debugfs_name = "mmhub_err_inject",
	};
	struct ras_common_if mmhub_ras_block = {
		.block = AMDGPU_RAS_BLOCK__MMHUB,
		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
		.sub_block_index = 0,
		.name = "mmhub",
	};

	r = gmc_v9_0_ecc_ras_block_late_init(handle,
			&umc_fs_info, &umc_ras_block);
	if (r)
		return r;

	r = gmc_v9_0_ecc_ras_block_late_init(handle,
			&mmhub_fs_info, &mmhub_ras_block);
	return r;
}

static int gmc_v9_0_late_init(void *handle)
{
@@ -1192,8 +1229,8 @@ static int gmc_v9_0_sw_fini(void *handle)
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
			adev->gmc.ras_if) {
		struct ras_common_if *ras_if = adev->gmc.ras_if;
			adev->gmc.umc_ras_if) {
		struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
		struct ras_ih_if ih_info = {
			.head = *ras_if,
		};
@@ -1207,6 +1244,17 @@ static int gmc_v9_0_sw_fini(void *handle)
		kfree(ras_if);
	}

	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) &&
			adev->gmc.mmhub_ras_if) {
		struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if;

		/* remove fs and disable ras feature */
		amdgpu_ras_debugfs_remove(adev, ras_if);
		amdgpu_ras_sysfs_remove(adev, ras_if);
		amdgpu_ras_feature_enable(adev, ras_if, 0);
		kfree(ras_if);
	}

	amdgpu_gem_force_release(adev);
	amdgpu_vm_manager_fini(adev);