summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-09-21 16:26:41 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-09-28 16:03:30 -0400
commitc98171ccf6580407d07a3b5dc8188ce9e1f4f7ca (patch)
tree191e13e83451ea87c6e4fdc758425b046a0c4bf8 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
parent1bab0fc01b84c1aa8a65a1f1de885e1faab48264 (diff)
downloadlwn-c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca.tar.gz
lwn-c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca.zip
drm/amdgpu: Handle GPUVM fault storms
When many wavefronts cause VM faults at the same time, it can overwhelm the interrupt handler and cause IH ring overflows before the driver can notify or kill the faulting application. As a workaround I'm introducing limited per-VM fault credit. After that number of VM faults have occurred, further VM faults are filtered out at the prescreen stage of processing. This depends on the PASID in the interrupt packet, so it currently only works for KFD contexts. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h7
1 files changed, 6 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 447ed6e7e586..66efbc2e43af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -165,8 +165,11 @@ struct amdgpu_vm {
/* Flag to indicate ATS support from PTE for GFX9 */
bool pte_support_ats;
- /* Up to 128 pending page faults */
+ /* Up to 128 pending retry page faults */
DECLARE_KFIFO(faults, u64, 128);
+
+ /* Limit non-retry fault storms */
+ unsigned int fault_credit;
};
struct amdgpu_vm_id {
@@ -244,6 +247,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
+ unsigned int pasid);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated,
struct amdgpu_bo_list_entry *entry);