Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8840a387 authored by pding's avatar pding Committed by Alex Deucher
Browse files

drm/amdgpu: retry init if it fails due to exclusive mode timeout (v3)



The exclusive mode has real-time limitation in reality, such like being
done in 300ms. It's easy observed if running many VF/VMs in single host
with heavy CPU workload.

If we find the init fails due to exclusive mode timeout, try it again.

v2:
 - rewrite the condition for readable value.

v3:
 - fix typo, add comments for sleep

Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarpding <Pixel.Ding@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b5914238
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -2303,6 +2303,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,

	r = amdgpu_init(adev);
	if (r) {
		/* failed in exclusive mode due to timeout */
		if (amdgpu_sriov_vf(adev) &&
		    !amdgpu_sriov_runtime(adev) &&
		    amdgpu_virt_mmio_blocked(adev) &&
		    !amdgpu_virt_wait_reset(adev)) {
			dev_err(adev->dev, "VF exclusive mode timeout\n");
			r = -EAGAIN;
			goto failed;
		}
		dev_err(adev->dev, "amdgpu_init failed\n");
		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
		amdgpu_fini(adev);
@@ -2390,6 +2399,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	amdgpu_vf_error_trans_all(adev);
	if (runtime)
		vga_switcheroo_fini_domain_pm_ops(adev->dev);

	return r;
}

+13 −2
Original line number Diff line number Diff line
@@ -86,7 +86,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
{
	struct amdgpu_device *adev;
	int r, acpi_status;
	int r, acpi_status, retry = 0;

#ifdef CONFIG_DRM_AMDGPU_SI
	if (!amdgpu_si_support) {
@@ -122,6 +122,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
		}
	}
#endif
retry_init:

	adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
	if (adev == NULL) {
@@ -144,7 +145,17 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
	 * VRAM allocation
	 */
	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
	if (r) {
	if (r == -EAGAIN && ++retry <= 3) {
		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
		adev->virt.ops = NULL;
		amdgpu_device_fini(adev);
		kfree(adev);
		dev->dev_private = NULL;
		/* Don't request EX mode too frequently which is attacking */
		msleep(5000);
		dev_err(&dev->pdev->dev, "retry init %d\n", retry);
		goto retry_init;
	} else if (r) {
		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
		goto out;
	}