drm/amdgpu: implement retry fault handling for Navi

Same as gmc9, basically filter the fault, reroute or handle it. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Christian König <christian.koenig@amd.com> 2020-11-03 14:40:57 +0100
committer: Alex Deucher <alexander.deucher@amd.com> 2020-11-24 12:07:01 -0500
commit: a2a8857ceefe0a5011624779b9edec4412df6b6e (patch)
tree: a9c575550edadb4e3989cd3f8b91ff3884efb80a
parent: d4581f7dfbe1f115b9f72f80a7f1febb52940bbd (diff)
download: lwn-a2a8857ceefe0a5011624779b9edec4412df6b6e.tar.gz
lwn-a2a8857ceefe0a5011624779b9edec4412df6b6e.zip
1 files changed, 25 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index cffc3ca8fcde..4f6e44e21691 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -94,6 +94,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 				       struct amdgpu_irq_src *source,
 				       struct amdgpu_iv_entry *entry)
 {
+	bool retry_fault = !!(entry->src_data[1] & 0x80);
 	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
 	struct amdgpu_task_info task_info;
 	uint32_t status = 0;
@@ -102,6 +103,30 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
+	if (retry_fault) {
+		/* Returning 1 here also prevents sending the IV to the KFD */
+
+		/* Process it onyl if it's the first fault for this address */
+		if (entry->ih != &adev->irq.ih_soft &&
+		    amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
+					     entry->timestamp))
+			return 1;
+
+		/* Delegate it to a different ring if the hardware hasn't
+		 * already done it.
+		 */
+		if (in_interrupt()) {
+			amdgpu_irq_delegate(adev, entry, 8);
+			return 1;
+		}
+
+		/* Try to handle the recoverable page faults by filling page
+		 * tables
+		 */
+		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+			return 1;
+	}
+
 	if (!amdgpu_sriov_vf(adev)) {
 		/*
 		 * Issue a dummy read to wait for the status register to
author	Christian König <christian.koenig@amd.com>	2020-11-03 14:40:57 +0100
committer	Alex Deucher <alexander.deucher@amd.com>	2020-11-24 12:07:01 -0500
commit	a2a8857ceefe0a5011624779b9edec4412df6b6e (patch)
tree	a9c575550edadb4e3989cd3f8b91ff3884efb80a
parent	d4581f7dfbe1f115b9f72f80a7f1febb52940bbd (diff)
download	lwn-a2a8857ceefe0a5011624779b9edec4412df6b6e.tar.gz lwn-a2a8857ceefe0a5011624779b9edec4412df6b6e.zip