diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 84 |
1 files changed, 47 insertions, 37 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3e2911895c74..100717a98ec1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -563,7 +563,8 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, int r; p = container_of(prange->svms, struct kfd_process, svms); - pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, + pr_debug("process pid: %d svms 0x%p [0x%lx 0x%lx]\n", + p->lead_thread->pid, prange->svms, prange->start, prange->last); if (svm_range_validate_svm_bo(node, prange)) @@ -1195,6 +1196,7 @@ svm_range_get_pte_flags(struct kfd_node *node, struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; + uint32_t gc_ip_version = KFD_GC_VERSION(node); uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); @@ -1204,7 +1206,7 @@ svm_range_get_pte_flags(struct kfd_node *node, if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1241,8 +1243,10 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1257,9 +1261,13 @@ svm_range_get_pte_flags(struct kfd_node *node, */ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P or extended system scope coherence */ - else + /* PCIe P2P on GPUs pre-9.5.0 */ + else if (gc_ip_version < IP_VERSION(9, 5, 0) && + !svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* Other remote memory */ + else + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { /* On NUMA systems, locality is determined per-page @@ -1271,18 +1279,15 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (gc_ip_version < IP_VERSION(9, 5, 0)) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; } break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ? @@ -1299,7 +1304,7 @@ svm_range_get_pte_flags(struct kfd_node *node, pte_flags = AMDGPU_PTE_VALID; pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0)) + if (gc_ip_version >= IP_VERSION(12, 0, 0)) pte_flags |= AMDGPU_PTE_IS_PTE; pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); @@ -2681,7 +2686,7 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } - if (node->adev->flags & AMD_IS_APU) + if (node->adev->apu_prefer_gtt) return 0; if (prange->preferred_loc == gpuid || @@ -2969,7 +2974,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, return -EFAULT; } - p = kfd_lookup_process_by_pasid(pasid); + p = kfd_lookup_process_by_pasid(pasid, NULL); if (!p) { pr_debug("kfd process not founded pasid 0x%x\n", pasid); return 0; @@ -2998,19 +3003,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - /* check if this page fault time stamp is before svms->checkpoint_ts */ - if (svms->checkpoint_ts[gpuidx] != 0) { - if (amdgpu_ih_ts_after(ts, svms->checkpoint_ts[gpuidx])) { - pr_debug("draining retry fault, drop fault 0x%llx\n", addr); - r = 0; - goto out; - } else - /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts - * to zero to avoid following ts wrap around give wrong comparing - */ - svms->checkpoint_ts[gpuidx] = 0; - } - if (!p->xnack_enabled) { pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); r = -EFAULT; @@ -3030,6 +3022,21 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); + + /* check if this page fault time stamp is before svms->checkpoint_ts */ + if (svms->checkpoint_ts[gpuidx] != 0) { + if (amdgpu_ih_ts_after_or_equal(ts, svms->checkpoint_ts[gpuidx])) { + pr_debug("draining retry fault, drop fault 0x%llx\n", addr); + r = -EAGAIN; + goto out_unlock_svms; + } else { + /* ts is after svms->checkpoint_ts now, reset svms->checkpoint_ts + * to zero to avoid following ts wrap around give wrong comparing + */ + svms->checkpoint_ts[gpuidx] = 0; + } + } + prange = svm_range_from_addr(svms, addr, NULL); if (!prange) { pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", @@ -3155,7 +3162,8 @@ out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(node, p, gpuidx); + if (r != -EAGAIN) + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: @@ -3232,7 +3240,8 @@ void svm_range_list_fini(struct kfd_process *p) struct svm_range *prange; struct svm_range *next; - pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms); + pr_debug("process pid %d svms 0x%p\n", p->lead_thread->pid, + &p->svms); cancel_delayed_work_sync(&p->svms.restore_work); @@ -3255,7 +3264,8 @@ void svm_range_list_fini(struct kfd_process *p) mutex_destroy(&p->svms.lock); - pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, &p->svms); + pr_debug("process pid %d svms 0x%p done\n", + p->lead_thread->pid, &p->svms); } int svm_range_list_init(struct kfd_process *p) @@ -3428,7 +3438,7 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; } - if (bo_node->adev->flags & AMD_IS_APU) { + if (bo_node->adev->apu_prefer_gtt) { best_loc = 0; goto out; } @@ -3618,8 +3628,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, bool flush_tlb; int r, ret = 0; - pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", - p->pasid, &p->svms, start, start + size - 1, size); + pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", + p->lead_thread->pid, &p->svms, start, start + size - 1, size); r = svm_range_check_attr(p, nattr, attrs); if (r) @@ -3727,8 +3737,8 @@ out_unlock_range: out: mutex_unlock(&process_info->lock); - pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, - &p->svms, start, start + size - 1, r); + pr_debug("process pid %d svms 0x%p [0x%llx 0x%llx] done, r=%d\n", + p->lead_thread->pid, &p->svms, start, start + size - 1, r); return ret ? ret : r; } |