summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-11-12 16:22:26 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-11-23 19:56:44 -0400
commit20fef4ef84bfb07d3658ff1347951c489fe7bafa (patch)
treebbd869d0d061cb4f354b0a7a78bdbd03e201683b
parentc625c274ee0058c2a04aa0d794314b5b277606b2 (diff)
downloadlwn-20fef4ef84bfb07d3658ff1347951c489fe7bafa.tar.gz
lwn-20fef4ef84bfb07d3658ff1347951c489fe7bafa.zip
nouveau: use mmu_interval_notifier instead of hmm_mirror
Remove the hmm_mirror object and use the mmu_interval_notifier API instead for the range, and use the normal mmu_notifier API for the general invalidation callback. While here re-organize the pagefault path so the locking pattern is clear. nouveau is the only driver that uses a temporary range object and instead forwards nearly every invalidation range directly to the HW. While this is not how the mmu_interval_notifier was intended to be used, the overheads on the pagefaulting path are similar to the existing hmm_mirror version. Particularly since the interval tree will be small. Link: https://lore.kernel.org/r/20191112202231.3856-10-jgg@ziepe.ca Tested-by: Ralph Campbell <rcampbell@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c179
1 files changed, 99 insertions, 80 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 577f8811925a..df9bf1fd1bc0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -96,8 +96,6 @@ struct nouveau_svmm {
} unmanaged;
struct mutex mutex;
-
- struct hmm_mirror mirror;
};
#define SVMM_DBG(s,f,a...) \
@@ -293,23 +291,11 @@ static const struct mmu_notifier_ops nouveau_mn_ops = {
.free_notifier = nouveau_svmm_free_notifier,
};
-static int
-nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- return 0;
-}
-
-static const struct hmm_mirror_ops nouveau_svmm = {
- .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
-};
-
void
nouveau_svmm_fini(struct nouveau_svmm **psvmm)
{
struct nouveau_svmm *svmm = *psvmm;
if (svmm) {
- hmm_mirror_unregister(&svmm->mirror);
mutex_lock(&svmm->mutex);
svmm->vmm = NULL;
mutex_unlock(&svmm->mutex);
@@ -357,15 +343,10 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
goto out_free;
down_write(&current->mm->mmap_sem);
- svmm->mirror.ops = &nouveau_svmm;
- ret = hmm_mirror_register(&svmm->mirror, current->mm);
- if (ret)
- goto out_mm_unlock;
-
svmm->notifier.ops = &nouveau_mn_ops;
ret = __mmu_notifier_register(&svmm->notifier, current->mm);
if (ret)
- goto out_hmm_unregister;
+ goto out_mm_unlock;
/* Note, ownership of svmm transfers to mmu_notifier */
cli->svm.svmm = svmm;
@@ -374,8 +355,6 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
mutex_unlock(&cli->mutex);
return 0;
-out_hmm_unregister:
- hmm_mirror_unregister(&svmm->mirror);
out_mm_unlock:
up_write(&current->mm->mmap_sem);
out_free:
@@ -503,43 +482,90 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm,
fault->inst, fault->addr, fault->access);
}
-static inline bool
-nouveau_range_done(struct hmm_range *range)
+struct svm_notifier {
+ struct mmu_interval_notifier notifier;
+ struct nouveau_svmm *svmm;
+};
+
+static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- bool ret = hmm_range_valid(range);
+ struct svm_notifier *sn =
+ container_of(mni, struct svm_notifier, notifier);
- hmm_range_unregister(range);
- return ret;
+ /*
+ * serializes the update to mni->invalidate_seq done by caller and
+ * prevents invalidation of the PTE from progressing while HW is being
+ * programmed. This is very hacky and only works because the normal
+ * notifier that does invalidation is always called after the range
+ * notifier.
+ */
+ if (mmu_notifier_range_blockable(range))
+ mutex_lock(&sn->svmm->mutex);
+ else if (!mutex_trylock(&sn->svmm->mutex))
+ return false;
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&sn->svmm->mutex);
+ return true;
}
-static int
-nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
+static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
+ .invalidate = nouveau_svm_range_invalidate,
+};
+
+static int nouveau_range_fault(struct nouveau_svmm *svmm,
+ struct nouveau_drm *drm, void *data, u32 size,
+ u64 *pfns, struct svm_notifier *notifier)
{
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ /* Have HMM fault pages within the fault window to the GPU. */
+ struct hmm_range range = {
+ .notifier = &notifier->notifier,
+ .start = notifier->notifier.interval_tree.start,
+ .end = notifier->notifier.interval_tree.last + 1,
+ .pfns = pfns,
+ .flags = nouveau_svm_pfn_flags,
+ .values = nouveau_svm_pfn_values,
+ .pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
+ };
+ struct mm_struct *mm = notifier->notifier.mm;
long ret;
- range->default_flags = 0;
- range->pfn_flags_mask = -1UL;
+ while (true) {
+ if (time_after(jiffies, timeout))
+ return -EBUSY;
- ret = hmm_range_register(range, &svmm->mirror);
- if (ret) {
- up_read(&svmm->notifier.mm->mmap_sem);
- return (int)ret;
- }
+ range.notifier_seq = mmu_interval_read_begin(range.notifier);
+ range.default_flags = 0;
+ range.pfn_flags_mask = -1UL;
+ down_read(&mm->mmap_sem);
+ ret = hmm_range_fault(&range, 0);
+ up_read(&mm->mmap_sem);
+ if (ret <= 0) {
+ if (ret == 0 || ret == -EBUSY)
+ continue;
+ return ret;
+ }
- if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
- up_read(&svmm->notifier.mm->mmap_sem);
- return -EBUSY;
+ mutex_lock(&svmm->mutex);
+ if (mmu_interval_read_retry(range.notifier,
+ range.notifier_seq)) {
+ mutex_unlock(&svmm->mutex);
+ continue;
+ }
+ break;
}
- ret = hmm_range_fault(range, 0);
- if (ret <= 0) {
- if (ret == 0)
- ret = -EBUSY;
- up_read(&svmm->notifier.mm->mmap_sem);
- hmm_range_unregister(range);
- return ret;
- }
- return 0;
+ nouveau_dmem_convert_pfn(drm, &range);
+
+ svmm->vmm->vmm.object.client->super = true;
+ ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
+ svmm->vmm->vmm.object.client->super = false;
+ mutex_unlock(&svmm->mutex);
+
+ return ret;
}
static int
@@ -559,7 +585,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
} i;
u64 phys[16];
} args;
- struct hmm_range range;
struct vm_area_struct *vma;
u64 inst, start, limit;
int fi, fn, pi, fill;
@@ -615,6 +640,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.version = 0;
for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
+ struct svm_notifier notifier;
struct mm_struct *mm;
/* Cancel any faults from non-SVM channels. */
@@ -623,7 +649,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
continue;
}
SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
- mm = svmm->notifier.mm;
/* We try and group handling of faults within a small
* window into a single update.
@@ -637,6 +662,12 @@ nouveau_svm_fault(struct nvif_notify *notify)
start = max_t(u64, start, svmm->unmanaged.limit);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+ mm = svmm->notifier.mm;
+ if (!mmget_not_zero(mm)) {
+ nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+ continue;
+ }
+
/* Intersect fault window with the CPU VMA, cancelling
* the fault if the address is invalid.
*/
@@ -645,16 +676,18 @@ nouveau_svm_fault(struct nvif_notify *notify)
if (!vma) {
SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
up_read(&mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
start = max_t(u64, start, vma->vm_start);
limit = min_t(u64, limit, vma->vm_end);
+ up_read(&mm->mmap_sem);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
if (buffer->fault[fi]->addr != start) {
SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
- up_read(&mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
@@ -710,33 +743,19 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.addr,
args.i.p.addr + args.i.p.size, fn - fi);
- /* Have HMM fault pages within the fault window to the GPU. */
- range.start = args.i.p.addr;
- range.end = args.i.p.addr + args.i.p.size;
- range.pfns = args.phys;
- range.flags = nouveau_svm_pfn_flags;
- range.values = nouveau_svm_pfn_values;
- range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
-again:
- ret = nouveau_range_fault(svmm, &range);
- if (ret == 0) {
- mutex_lock(&svmm->mutex);
- if (!nouveau_range_done(&range)) {
- mutex_unlock(&svmm->mutex);
- goto again;
- }
-
- nouveau_dmem_convert_pfn(svm->drm, &range);
-
- svmm->vmm->vmm.object.client->super = true;
- ret = nvif_object_ioctl(&svmm->vmm->vmm.object,
- &args, sizeof(args.i) +
- pi * sizeof(args.phys[0]),
- NULL);
- svmm->vmm->vmm.object.client->super = false;
- mutex_unlock(&svmm->mutex);
- up_read(&mm->mmap_sem);
+ notifier.svmm = svmm;
+ ret = mmu_interval_notifier_insert(&notifier.notifier,
+ svmm->notifier.mm,
+ args.i.p.addr, args.i.p.size,
+ &nouveau_svm_mni_ops);
+ if (!ret) {
+ ret = nouveau_range_fault(
+ svmm, svm->drm, &args,
+ sizeof(args.i) + pi * sizeof(args.phys[0]),
+ args.phys, &notifier);
+ mmu_interval_notifier_remove(&notifier.notifier);
}
+ mmput(mm);
/* Cancel any faults in the window whose pages didn't manage
* to keep their valid bit, or stay writeable when required.
@@ -745,10 +764,10 @@ again:
*/
while (fi < fn) {
struct nouveau_svm_fault *fault = buffer->fault[fi++];
- pi = (fault->addr - range.start) >> PAGE_SHIFT;
+ pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
if (ret ||
- !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) ||
- (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) &&
+ !(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) ||
+ (!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
fault->access != 0 && fault->access != 3)) {
nouveau_svm_fault_cancel_fault(svm, fault);
continue;