From ddfaed17a779ddb728e5534a87596950842d6b04 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 14:53:06 +0100 Subject: mm/hmm: don't provide a stub for hmm_range_fault() All callers of hmm_range_fault depend on CONFIG_HMM_MIRROR, so don't bother with a stub. Link: https://lore.kernel.org/r/20200316135310.899364-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Zi Yan Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index ddf9f7144c43..c102e359b59d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -225,17 +225,10 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) -#ifdef CONFIG_HMM_MIRROR /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ long hmm_range_fault(struct hmm_range *range, unsigned int flags); -#else -static inline long hmm_range_fault(struct hmm_range *range, unsigned int flags) -{ - return -EOPNOTSUPP; -} -#endif /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range -- cgit v1.2.3 From 96268163f9c9443e7f73a202253f68566f93dc79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 14:53:07 +0100 Subject: mm/hmm: remove the unused HMM_FAULT_ALLOW_RETRY flag The HMM_FAULT_ALLOW_RETRY isn't used anywhere in the tree. Remove it and the weird -EAGAIN handling where handle_mm_fault() drops the mmap_sem. Link: https://lore.kernel.org/r/20200316135310.899364-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 5 ----- mm/hmm.c | 7 ------- 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index c102e359b59d..4bf8d6997b12 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -217,11 +217,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, range->flags[HMM_PFN_VALID]; } -/* - * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case. - */ -#define HMM_FAULT_ALLOW_RETRY (1 << 0) - /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) diff --git a/mm/hmm.c b/mm/hmm.c index 37a6fca7da43..df98297afe80 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -45,16 +45,10 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, if (!vma) goto err; - if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY) - flags |= FAULT_FLAG_ALLOW_RETRY; if (write_fault) flags |= FAULT_FLAG_WRITE; ret = handle_mm_fault(vma, addr, flags); - if (ret & VM_FAULT_RETRY) { - /* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */ - return -EAGAIN; - } if (ret & VM_FAULT_ERROR) goto err; @@ -662,7 +656,6 @@ static const struct mm_walk_ops hmm_walk_ops = { * -ENOMEM: Out of memory. * -EPERM: Invalid permission (e.g., asking for write and range is read * only). - * -EAGAIN: A page fault needs to be retried and mmap_sem was dropped. * -EBUSY: The range has been invalidated and the caller needs to wait for * the invalidation to finish. * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access -- cgit v1.2.3 From f894ddd5ff01d3bb48faf4dc533536bf5269c17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:13 +0100 Subject: memremap: add an owner field to struct dev_pagemap Add a new opaque owner field to struct dev_pagemap, which will allow the hmm and migrate_vma code to identify who owns ZONE_DEVICE memory, and refuse to work on mappings not owned by the calling entity. Link: https://lore.kernel.org/r/20200316193216.920734-2-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- arch/powerpc/kvm/book3s_hv_uvmem.c | 2 ++ drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + include/linux/memremap.h | 4 ++++ mm/memremap.c | 4 ++++ 4 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 79b1202b1c62..67fefb03b9b7 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -779,6 +779,8 @@ int kvmppc_uvmem_init(void) kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; kvmppc_uvmem_pgmap.res = *res; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; + /* just one global instance: */ + kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE); if (IS_ERR(addr)) { ret = PTR_ERR(addr); diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 0ad5d87b5a8e..a4682272586e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -526,6 +526,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE; drm->dmem->pagemap.res = *res; drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops; + drm->dmem->pagemap.owner = drm->dev; if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap))) goto out_free; diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 6fefb09af7c3..60d97e8fd3c0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -103,6 +103,9 @@ struct dev_pagemap_ops { * @type: memory type: see MEMORY_* in memory_hotplug.h * @flags: PGMAP_* flags to specify defailed behavior * @ops: method table + * @owner: an opaque pointer identifying the entity that manages this + * instance. Used by various helpers to make sure that no + * foreign ZONE_DEVICE memory is accessed. */ struct dev_pagemap { struct vmem_altmap altmap; @@ -113,6 +116,7 @@ struct dev_pagemap { enum memory_type type; unsigned int flags; const struct dev_pagemap_ops *ops; + void *owner; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) diff --git a/mm/memremap.c b/mm/memremap.c index 09b5b7adc773..9b2c97ceb775 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -181,6 +181,10 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "Missing migrate_to_ram method\n"); return ERR_PTR(-EINVAL); } + if (!pgmap->owner) { + WARN(1, "Missing owner\n"); + return ERR_PTR(-EINVAL); + } break; case MEMORY_DEVICE_FS_DAX: if (!IS_ENABLED(CONFIG_ZONE_DEVICE) || -- cgit v1.2.3 From 800bb1c8dc80bb4121446b56813067f3ea44edee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:14 +0100 Subject: mm: handle multiple owners of device private pages in migrate_vma Add a new src_owner field to struct migrate_vma. If the field is set, only device private pages with page->pgmap->owner equal to that field are migrated. If the field is not set only "normal" pages are migrated. Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent with CPU") Link: https://lore.kernel.org/r/20200316193216.920734-3-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Ralph Campbell Tested-by: Bharata B Rao Signed-off-by: Jason Gunthorpe --- arch/powerpc/kvm/book3s_hv_uvmem.c | 1 + drivers/gpu/drm/nouveau/nouveau_dmem.c | 1 + include/linux/migrate.h | 8 ++++++++ mm/migrate.c | 9 ++++++--- 4 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 67fefb03b9b7..f44f6b27950f 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -563,6 +563,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, mig.end = end; mig.src = &src_pfn; mig.dst = &dst_pfn; + mig.src_owner = &kvmppc_uvmem_pgmap; mutex_lock(&kvm->arch.uvmem_lock); /* The requested page is already paged-out, nothing to do */ diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index a4682272586e..0e36345d395c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -176,6 +176,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf) .end = vmf->address + PAGE_SIZE, .src = &src, .dst = &dst, + .src_owner = drm->dev, }; /* diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 72120061b7d4..3e546cbf03dd 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -196,6 +196,14 @@ struct migrate_vma { unsigned long npages; unsigned long start; unsigned long end; + + /* + * Set to the owner value also stored in page->pgmap->owner for + * migrating out of device private memory. If set only device + * private pages with this owner are migrated. If not set + * device private pages are not migrated at all. + */ + void *src_owner; }; int migrate_vma_setup(struct migrate_vma *args); diff --git a/mm/migrate.c b/mm/migrate.c index b1092876e537..7605d2c23433 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2241,7 +2241,7 @@ again: arch_enter_lazy_mmu_mode(); for (; addr < end; addr += PAGE_SIZE, ptep++) { - unsigned long mpfn, pfn; + unsigned long mpfn = 0, pfn; struct page *page; swp_entry_t entry; pte_t pte; @@ -2255,8 +2255,6 @@ again: } if (!pte_present(pte)) { - mpfn = 0; - /* * Only care about unaddressable device page special * page table entry. Other special swap entries are not @@ -2267,11 +2265,16 @@ again: goto next; page = device_private_entry_to_page(entry); + if (page->pgmap->owner != migrate->src_owner) + goto next; + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_write_device_private_entry(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { + if (migrate->src_owner) + goto next; pfn = pte_pfn(pte); if (is_zero_pfn(pfn)) { mpfn = MIGRATE_PFN_MIGRATE; -- cgit v1.2.3 From 17ffdc482982af92bddb59692af1c5e1de23d184 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:15 +0100 Subject: mm: simplify device private page handling in hmm_range_fault Remove the HMM_PFN_DEVICE_PRIVATE flag, no driver has ever set this flag on input, and the only place that uses it on output can be trivially changed to use is_device_private_page(). This removes the ability to request that device_private pages are faulted back into system memory. Link: https://lore.kernel.org/r/20200316193216.920734-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 - drivers/gpu/drm/nouveau/nouveau_dmem.c | 5 +++-- drivers/gpu/drm/nouveau/nouveau_svm.c | 1 - include/linux/hmm.h | 2 -- mm/hmm.c | 25 +++++-------------------- 5 files changed, 8 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dee446278417..90821ce5e6ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -776,7 +776,6 @@ struct amdgpu_ttm_tt { static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { (1 << 0), /* HMM_PFN_VALID */ (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ }; static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 0e36345d395c..edfd0805fba4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -692,9 +693,8 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, if (page == NULL) continue; - if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) { + if (!is_device_private_page(page)) continue; - } if (!nouveau_dmem_page(drm, page)) { WARN(1, "Some unknown device memory !\n"); @@ -705,5 +705,6 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, addr = nouveau_dmem_page_addr(page); range->pfns[i] &= ((1UL << range->pfn_shift) - 1); range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; + range->pfns[i] |= NVIF_VMM_PFNMAP_V0_VRAM; } } diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index df9bf1fd1bc0..39c731a99937 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -367,7 +367,6 @@ static const u64 nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = { [HMM_PFN_VALID ] = NVIF_VMM_PFNMAP_V0_V, [HMM_PFN_WRITE ] = NVIF_VMM_PFNMAP_V0_W, - [HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM, }; static const u64 diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 4bf8d6997b12..5e6034f105c3 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -74,7 +74,6 @@ * Flags: * HMM_PFN_VALID: pfn is valid. It has, at least, read permission. * HMM_PFN_WRITE: CPU page table has write permission set - * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE) * * The driver provides a flags array for mapping page protections to device * PTE bits. If the driver valid bit for an entry is bit 3, @@ -86,7 +85,6 @@ enum hmm_pfn_flag_e { HMM_PFN_VALID = 0, HMM_PFN_WRITE, - HMM_PFN_DEVICE_PRIVATE, HMM_PFN_FLAG_MAX }; diff --git a/mm/hmm.c b/mm/hmm.c index a5f4f8010965..613b34950c30 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -116,15 +116,6 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, /* We aren't ask to do anything ... */ if (!(pfns & range->flags[HMM_PFN_VALID])) return; - /* If this is device memory then only fault if explicitly requested */ - if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) { - /* Do we fault on device memory ? */ - if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) { - *write_fault = pfns & range->flags[HMM_PFN_WRITE]; - *fault = true; - } - return; - } /* If CPU page table is not valid then we need to fault */ *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); @@ -262,21 +253,15 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * This is a special swap entry, ignore migration, use - * device and report anything else as error. + * Never fault in device private pages pages, but just report + * the PFN even if not present. */ if (is_device_private_entry(entry)) { - cpu_flags = range->flags[HMM_PFN_VALID] | - range->flags[HMM_PFN_DEVICE_PRIVATE]; - cpu_flags |= is_write_device_private_entry(entry) ? - range->flags[HMM_PFN_WRITE] : 0; - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - &fault, &write_fault); - if (fault || write_fault) - goto fault; *pfn = hmm_device_entry_from_pfn(range, swp_offset(entry)); - *pfn |= cpu_flags; + *pfn |= range->flags[HMM_PFN_VALID]; + if (is_write_device_private_entry(entry)) + *pfn |= range->flags[HMM_PFN_WRITE]; return 0; } -- cgit v1.2.3 From 08ddddda667b3b7aaac10641418283f78118c5cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Mar 2020 20:32:16 +0100 Subject: mm/hmm: check the device private page owner in hmm_range_fault() hmm_range_fault() will succeed for any kind of device private memory, even if it doesn't belong to the calling entity. While nouveau has some crude checks for that, they are broken because they assume nouveau is the only user of device private memory. Fix this by passing in an expected pgmap owner in the hmm_range_fault structure. If a device_private page is found and doesn't match the owner then it is treated as an non-present and non-faultable page. This prevents a bug in amdgpu, where it doesn't know how to handle device_private pages, but hmm_range_fault would return them anyhow. Fixes: 4ef589dc9b10 ("mm/hmm/devmem: device memory hotplug using ZONE_DEVICE") Link: https://lore.kernel.org/r/20200316193216.920734-5-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jason Gunthorpe Reviewed-by: Ralph Campbell Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 12 ------------ include/linux/hmm.h | 2 ++ mm/hmm.c | 10 +++++++++- 3 files changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index edfd0805fba4..ad89e09a0be3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -672,12 +672,6 @@ out: return ret; } -static inline bool -nouveau_dmem_page(struct nouveau_drm *drm, struct page *page) -{ - return is_device_private_page(page) && drm->dmem == page_to_dmem(page); -} - void nouveau_dmem_convert_pfn(struct nouveau_drm *drm, struct hmm_range *range) @@ -696,12 +690,6 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm, if (!is_device_private_page(page)) continue; - if (!nouveau_dmem_page(drm, page)) { - WARN(1, "Some unknown device memory !\n"); - range->pfns[i] = 0; - continue; - } - addr = nouveau_dmem_page_addr(page); range->pfns[i] &= ((1UL << range->pfn_shift) - 1); range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift; diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 5e6034f105c3..bb6be4428633 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -132,6 +132,7 @@ enum hmm_pfn_value_e { * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) * @valid: pfns array did not change since it has been fill by an HMM function + * @dev_private_owner: owner of device private pages */ struct hmm_range { struct mmu_interval_notifier *notifier; @@ -144,6 +145,7 @@ struct hmm_range { uint64_t default_flags; uint64_t pfn_flags_mask; uint8_t pfn_shift; + void *dev_private_owner; }; /* diff --git a/mm/hmm.c b/mm/hmm.c index 613b34950c30..a491d9aaafe4 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -218,6 +218,14 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, uint64_t *pfns, pmd_t pmd); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline bool hmm_is_device_private_entry(struct hmm_range *range, + swp_entry_t entry) +{ + return is_device_private_entry(entry) && + device_private_entry_to_page(entry)->pgmap->owner == + range->dev_private_owner; +} + static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte)) @@ -256,7 +264,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, * Never fault in device private pages pages, but just report * the PFN even if not present. */ - if (is_device_private_entry(entry)) { + if (hmm_is_device_private_entry(range, entry)) { *pfn = hmm_device_entry_from_pfn(range, swp_offset(entry)); *pfn |= range->flags[HMM_PFN_VALID]; -- cgit v1.2.3 From f970b977e068aa54e6eaf916a964a0abaf028afe Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Mar 2020 17:00:15 -0300 Subject: mm/hmm: remove unused code and tidy comments Delete several functions that are never called, fix some desync between comments and structure content, toss the now out of date top of file header, and move one function only used by hmm.c into hmm.c Link: https://lore.kernel.org/r/20200327200021.29372-4-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- include/linux/hmm.h | 104 +--------------------------------------------------- mm/hmm.c | 24 ++++++++---- 2 files changed, 19 insertions(+), 109 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index bb6be4428633..daee6508a3f6 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -3,58 +3,8 @@ * Copyright 2013 Red Hat Inc. * * Authors: Jérôme Glisse - */ -/* - * Heterogeneous Memory Management (HMM) - * - * See Documentation/vm/hmm.rst for reasons and overview of what HMM is and it - * is for. Here we focus on the HMM API description, with some explanation of - * the underlying implementation. - * - * Short description: HMM provides a set of helpers to share a virtual address - * space between CPU and a device, so that the device can access any valid - * address of the process (while still obeying memory protection). HMM also - * provides helpers to migrate process memory to device memory, and back. Each - * set of functionality (address space mirroring, and migration to and from - * device memory) can be used independently of the other. - * - * - * HMM address space mirroring API: - * - * Use HMM address space mirroring if you want to mirror a range of the CPU - * page tables of a process into a device page table. Here, "mirror" means "keep - * synchronized". Prerequisites: the device must provide the ability to write- - * protect its page tables (at PAGE_SIZE granularity), and must be able to - * recover from the resulting potential page faults. * - * HMM guarantees that at any point in time, a given virtual address points to - * either the same memory in both CPU and device page tables (that is: CPU and - * device page tables each point to the same pages), or that one page table (CPU - * or device) points to no entry, while the other still points to the old page - * for the address. The latter case happens when the CPU page table update - * happens first, and then the update is mirrored over to the device page table. - * This does not cause any issue, because the CPU page table cannot start - * pointing to a new page until the device page table is invalidated. - * - * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any - * updates to each device driver that has registered a mirror. It also provides - * some API calls to help with taking a snapshot of the CPU page table, and to - * synchronize with any updates that might happen concurrently. - * - * - * HMM migration to and from device memory: - * - * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with - * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page - * of the device memory, and allows the device driver to manage its memory - * using those struct pages. Having struct pages for device memory makes - * migration easier. Because that memory is not addressable by the CPU it must - * never be pinned to the device; in other words, any CPU page fault can always - * cause the device memory to be migrated (copied/moved) back to regular memory. - * - * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that - * allows use of a device DMA engine to perform the copy operation between - * regular system memory and device memory. + * See Documentation/vm/hmm.rst for reasons and overview of what HMM is. */ #ifndef LINUX_HMM_H #define LINUX_HMM_H @@ -120,9 +70,6 @@ enum hmm_pfn_value_e { * * @notifier: a mmu_interval_notifier that includes the start/end * @notifier_seq: result of mmu_interval_read_begin() - * @hmm: the core HMM structure this range is active against - * @vma: the vm area struct for the range - * @list: all range lock are on a list * @start: range virtual start address (inclusive) * @end: range virtual end address (exclusive) * @pfns: array of pfns (big enough for the range) @@ -130,8 +77,7 @@ enum hmm_pfn_value_e { * @values: pfn value for some special case (none, special, error, ...) * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter - * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) - * @valid: pfns array did not change since it has been fill by an HMM function + * @pfn_shift: pfn shift value (should be <= PAGE_SHIFT) * @dev_private_owner: owner of device private pages */ struct hmm_range { @@ -171,52 +117,6 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); } -/* - * hmm_device_entry_to_pfn() - return pfn value store in a device entry - * @range: range use to decode device entry value - * @entry: device entry to extract pfn from - * Return: pfn value if device entry is valid, -1UL otherwise - */ -static inline unsigned long -hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) -{ - if (pfn == range->values[HMM_PFN_NONE]) - return -1UL; - if (pfn == range->values[HMM_PFN_ERROR]) - return -1UL; - if (pfn == range->values[HMM_PFN_SPECIAL]) - return -1UL; - if (!(pfn & range->flags[HMM_PFN_VALID])) - return -1UL; - return (pfn >> range->pfn_shift); -} - -/* - * hmm_device_entry_from_page() - create a valid device entry for a page - * @range: range use to encode HMM pfn value - * @page: page for which to create the device entry - * Return: valid device entry for the page - */ -static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, - struct page *page) -{ - return (page_to_pfn(page) << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - -/* - * hmm_device_entry_from_pfn() - create a valid device entry value from pfn - * @range: range use to encode HMM pfn value - * @pfn: pfn value for which to create the device entry - * Return: valid device entry for the pfn - */ -static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, - unsigned long pfn) -{ - return (pfn << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1) diff --git a/mm/hmm.c b/mm/hmm.c index d208ddd35106..136de474221d 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -38,6 +38,18 @@ enum { HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT, }; +/* + * hmm_device_entry_from_pfn() - create a valid device entry value from pfn + * @range: range use to encode HMM pfn value + * @pfn: pfn value for which to create the device entry + * Return: valid device entry for the pfn + */ +static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, + unsigned long pfn) +{ + return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID]; +} + static int hmm_pfns_fill(unsigned long addr, unsigned long end, struct hmm_range *range, enum hmm_pfn_value_e value) { @@ -544,7 +556,7 @@ static const struct mm_walk_ops hmm_walk_ops = { /** * hmm_range_fault - try to fault some address in a virtual address range - * @range: range being faulted + * @range: argument structure * @flags: HMM_FAULT_* flags * * Return: the number of valid pages in range->pfns[] (from range start @@ -558,13 +570,11 @@ static const struct mm_walk_ops hmm_walk_ops = { * only). * -EBUSY: The range has been invalidated and the caller needs to wait for * the invalidation to finish. - * -EFAULT: Invalid (i.e., either no valid vma or it is illegal to access - * that range) number of valid pages in range->pfns[] (from - * range start address). + * -EFAULT: A page was requested to be valid and could not be made valid + * ie it has no backing VMA or it is illegal to access * - * This is similar to a regular CPU page fault except that it will not trigger - * any memory migration if the memory being faulted is not accessible by CPUs - * and caller does not ask for migration. + * This is similar to get_user_pages(), except that it can read the page tables + * without mutating them (ie causing faults). * * On error, for one virtual address in the range, the function will mark the * corresponding HMM pfn entry with an error flag. -- cgit v1.2.3 From 6bfef2f9194519ca23dee405a9f4db461a7a7826 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 27 Mar 2020 17:00:16 -0300 Subject: mm/hmm: remove HMM_FAULT_SNAPSHOT Now that flags are handled on a fine-grained per-page basis this global flag is redundant and has a confusing overlap with the pfn_flags_mask and default_flags. Normalize the HMM_FAULT_SNAPSHOT behavior into one place. Callers needing the SNAPSHOT behavior should set a pfn_flags_mask and default_flags that always results in a cleared HMM_PFN_VALID. Then no pages will be faulted, and HMM_FAULT_SNAPSHOT is not a special flow that overrides the masking mechanism. As this is the last flag, also remove the flags argument. If future flags are needed they can be part of the struct hmm_range function arguments. Link: https://lore.kernel.org/r/20200327200021.29372-5-jgg@ziepe.ca Reviewed-by: Christoph Hellwig Signed-off-by: Jason Gunthorpe --- Documentation/vm/hmm.rst | 12 +++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 5 +---- mm/hmm.c | 17 +++++++++-------- 5 files changed, 17 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index 95fec5968362..4e3e9362afeb 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -161,13 +161,11 @@ device must complete the update before the driver callback returns. When the device driver wants to populate a range of virtual addresses, it can use:: - long hmm_range_fault(struct hmm_range *range, unsigned int flags); + long hmm_range_fault(struct hmm_range *range); -With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table -entries and will not trigger a page fault on missing or non-present entries. -Without that flag, it does trigger a page fault on missing or read-only entries -if write access is requested (see below). Page faults use the generic mm page -fault code path just like a CPU page fault. +It will trigger a page fault on missing or read-only entries if write access is +requested (see below). Page faults use the generic mm page fault code path just +like a CPU page fault. Both functions copy CPU page table entries into their pfns array argument. Each entry in that array corresponds to an address in the virtual range. HMM @@ -197,7 +195,7 @@ The usage pattern is:: again: range.notifier_seq = mmu_interval_read_begin(&interval_sub); down_read(&mm->mmap_sem); - ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT); + ret = hmm_range_fault(&range); if (ret) { up_read(&mm->mmap_sem); if (ret == -EBUSY) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 90821ce5e6ca..c52029070937 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -856,7 +856,7 @@ retry: range->notifier_seq = mmu_interval_read_begin(&bo->notifier); down_read(&mm->mmap_sem); - r = hmm_range_fault(range, 0); + r = hmm_range_fault(range); up_read(&mm->mmap_sem); if (unlikely(r <= 0)) { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 39c731a99937..e3797b2d4d17 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -540,7 +540,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, range.default_flags = 0; range.pfn_flags_mask = -1UL; down_read(&mm->mmap_sem); - ret = hmm_range_fault(&range, 0); + ret = hmm_range_fault(&range); up_read(&mm->mmap_sem); if (ret <= 0) { if (ret == 0 || ret == -EBUSY) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index daee6508a3f6..7475051100c7 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -117,13 +117,10 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); } -/* Don't fault in missing PTEs, just snapshot the current state. */ -#define HMM_FAULT_SNAPSHOT (1 << 1) - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -long hmm_range_fault(struct hmm_range *range, unsigned int flags); +long hmm_range_fault(struct hmm_range *range); /* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range diff --git a/mm/hmm.c b/mm/hmm.c index 136de474221d..8dbd9e1d0308 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -29,7 +29,6 @@ struct hmm_vma_walk { struct hmm_range *range; unsigned long last; - unsigned int flags; }; enum { @@ -112,9 +111,6 @@ static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, { struct hmm_range *range = hmm_vma_walk->range; - if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) - return 0; - /* * So we not only consider the individual per page request we also * consider the default flags requested for the range. The API can @@ -147,10 +143,17 @@ hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, const uint64_t *pfns, unsigned long npages, uint64_t cpu_flags) { + struct hmm_range *range = hmm_vma_walk->range; unsigned int required_fault = 0; unsigned long i; - if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) + /* + * If the default flags do not request to fault pages, and the mask does + * not allow for individual pages to be faulted, then + * hmm_pte_need_fault() will always return 0. + */ + if (!((range->default_flags | range->pfn_flags_mask) & + range->flags[HMM_PFN_VALID])) return 0; for (i = 0; i < npages; ++i) { @@ -557,7 +560,6 @@ static const struct mm_walk_ops hmm_walk_ops = { /** * hmm_range_fault - try to fault some address in a virtual address range * @range: argument structure - * @flags: HMM_FAULT_* flags * * Return: the number of valid pages in range->pfns[] (from range start * address), which may be zero. On error one of the following status codes @@ -579,12 +581,11 @@ static const struct mm_walk_ops hmm_walk_ops = { * On error, for one virtual address in the range, the function will mark the * corresponding HMM pfn entry with an error flag. */ -long hmm_range_fault(struct hmm_range *range, unsigned int flags) +long hmm_range_fault(struct hmm_range *range) { struct hmm_vma_walk hmm_vma_walk = { .range = range, .last = range->start, - .flags = flags, }; struct mm_struct *mm = range->notifier->mm; int ret; -- cgit v1.2.3