summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorChong Li <chongli2@amd.com>2023-04-14 13:51:19 +0800
committerAlex Deucher <alexander.deucher@amd.com>2023-04-18 16:28:50 -0400
commit38eecbe086a4e52f54b2bbda8feba65d44addbef (patch)
tree4a047e2f7f903a6faa718fcfb1735f0faee75898 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parent8d9cdb4674f6e4e7fc789f8184a58c73eeadc16c (diff)
downloadlwn-38eecbe086a4e52f54b2bbda8feba65d44addbef.tar.gz
lwn-38eecbe086a4e52f54b2bbda8feba65d44addbef.zip
drm/amdgpu: release gpu full access after "amdgpu_device_ip_late_init"
[WHY] Function "amdgpu_irq_update()" called by "amdgpu_device_ip_late_init()" is an atomic context. We shouldn't access registers through KIQ since "msleep()" may be called in "amdgpu_kiq_rreg()". [HOW] Move function "amdgpu_virt_release_full_gpu()" after function "amdgpu_device_ip_late_init()", to ensure that registers be accessed through RLCG instead of KIQ. Call Trace: <TASK> show_stack+0x52/0x69 dump_stack_lvl+0x49/0x6d dump_stack+0x10/0x18 __schedule_bug.cold+0x4f/0x6b __schedule+0x473/0x5d0 ? __wake_up_klogd.part.0+0x40/0x70 ? vprintk_emit+0xbe/0x1f0 schedule+0x68/0x110 schedule_timeout+0x87/0x160 ? timer_migration_handler+0xa0/0xa0 msleep+0x2d/0x50 amdgpu_kiq_rreg+0x18d/0x1f0 [amdgpu] amdgpu_device_rreg.part.0+0x59/0xd0 [amdgpu] amdgpu_device_rreg+0x3a/0x50 [amdgpu] amdgpu_sriov_rreg+0x3c/0xb0 [amdgpu] gfx_v10_0_set_gfx_eop_interrupt_state.constprop.0+0x16c/0x190 [amdgpu] gfx_v10_0_set_eop_interrupt_state+0xa5/0xb0 [amdgpu] amdgpu_irq_update+0x53/0x80 [amdgpu] amdgpu_irq_get+0x7c/0xb0 [amdgpu] amdgpu_fence_driver_hw_init+0x58/0x90 [amdgpu] amdgpu_device_init.cold+0x16b7/0x2022 [amdgpu] Signed-off-by: Chong Li <chongli2@amd.com> Reviewed-by: JingWen.Chen2@amd.com Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c32
1 files changed, 17 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a2292acf06d0..9b1eaba85bbd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2539,8 +2539,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
amdgpu_fru_get_product_info(adev);
init_failed:
- if (amdgpu_sriov_vf(adev))
- amdgpu_virt_release_full_gpu(adev, true);
return r;
}
@@ -3859,18 +3857,6 @@ fence_driver_init:
r = amdgpu_device_ip_init(adev);
if (r) {
- /* failed in exclusive mode due to timeout */
- if (amdgpu_sriov_vf(adev) &&
- !amdgpu_sriov_runtime(adev) &&
- amdgpu_virt_mmio_blocked(adev) &&
- !amdgpu_virt_wait_reset(adev)) {
- dev_err(adev->dev, "VF exclusive mode timeout\n");
- /* Don't send request since VF is inactive. */
- adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
- adev->virt.ops = NULL;
- r = -EAGAIN;
- goto release_ras_con;
- }
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
goto release_ras_con;
@@ -3939,8 +3925,10 @@ fence_driver_init:
msecs_to_jiffies(AMDGPU_RESUME_MS));
}
- if (amdgpu_sriov_vf(adev))
+ if (amdgpu_sriov_vf(adev)) {
+ amdgpu_virt_release_full_gpu(adev, true);
flush_delayed_work(&adev->delayed_init_work);
+ }
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
if (r)
@@ -3980,6 +3968,20 @@ fence_driver_init:
return 0;
release_ras_con:
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_release_full_gpu(adev, true);
+
+ /* failed in exclusive mode due to timeout */
+ if (amdgpu_sriov_vf(adev) &&
+ !amdgpu_sriov_runtime(adev) &&
+ amdgpu_virt_mmio_blocked(adev) &&
+ !amdgpu_virt_wait_reset(adev)) {
+ dev_err(adev->dev, "VF exclusive mode timeout\n");
+ /* Don't send request since VF is inactive. */
+ adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+ adev->virt.ops = NULL;
+ r = -EAGAIN;
+ }
amdgpu_release_ras_context(adev);
failed: