diff options
author | Felix Kuehling <Felix.Kuehling@amd.com> | 2017-09-21 16:26:41 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2017-09-28 16:03:30 -0400 |
commit | c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca (patch) | |
tree | 191e13e83451ea87c6e4fdc758425b046a0c4bf8 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | |
parent | 1bab0fc01b84c1aa8a65a1f1de885e1faab48264 (diff) | |
download | lwn-c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca.tar.gz lwn-c98171ccf6580407d07a3b5dc8188ce9e1f4f7ca.zip |
drm/amdgpu: Handle GPUVM fault storms
When many wavefronts cause VM faults at the same time, it can
overwhelm the interrupt handler and cause IH ring overflows before
the driver can notify or kill the faulting application.
As a workaround I'm introducing limited per-VM fault credit. After
that number of VM faults have occurred, further VM faults are
filtered out at the prescreen stage of processing.
This depends on the PASID in the interrupt packet, so it currently
only works for KFD contexts.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 447ed6e7e586..66efbc2e43af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -165,8 +165,11 @@ struct amdgpu_vm { /* Flag to indicate ATS support from PTE for GFX9 */ bool pte_support_ats; - /* Up to 128 pending page faults */ + /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); + + /* Limit non-retry fault storms */ + unsigned int fault_credit; }; struct amdgpu_vm_id { @@ -244,6 +247,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, + unsigned int pasid); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); |