summaryrefslogtreecommitdiff
path: root/mm/internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'mm/internal.h')
-rw-r--r--mm/internal.h217
1 files changed, 195 insertions, 22 deletions
diff --git a/mm/internal.h b/mm/internal.h
index cb0af847d7d9..c693646e5b3f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -11,6 +11,7 @@
#include <linux/khugepaged.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
+#include <linux/mmu_notifier.h>
#include <linux/pagemap.h>
#include <linux/pagewalk.h>
#include <linux/rmap.h>
@@ -516,14 +517,30 @@ void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc);
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
+/**
+ * sync_with_folio_pmd_zap - sync with concurrent zapping of a folio PMD
+ * @mm: The mm_struct.
+ * @pmdp: Pointer to the pmd that was found to be pmd_none().
+ *
+ * When we find a pmd_none() while unmapping a folio without holding the PTL,
+ * zap_huge_pmd() may have cleared the PMD but not yet modified the folio to
+ * indicate that it's unmapped. Skipping the PMD without synchronization could
+ * make folio unmapping code assume that unmapping failed.
+ *
+ * Wait for concurrent zapping to complete by grabbing the PTL.
+ */
+static inline void sync_with_folio_pmd_zap(struct mm_struct *mm, pmd_t *pmdp)
+{
+ spinlock_t *ptl = pmd_lock(mm, pmdp);
+
+ spin_unlock(ptl);
+}
+
struct zap_details;
-void unmap_page_range(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- unsigned long addr, unsigned long end,
- struct zap_details *details);
-void zap_page_range_single_batched(struct mmu_gather *tlb,
+void zap_vma_range_batched(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long addr,
unsigned long size, struct zap_details *details);
+int zap_vma_for_reaping(struct vm_area_struct *vma);
int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio,
gfp_t gfp);
@@ -624,6 +641,11 @@ int user_proactive_reclaim(char *buf,
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
/*
+ * in mm/khugepaged.c
+ */
+void set_recommended_min_free_kbytes(void);
+
+/*
* in mm/page_alloc.c
*/
#define K(x) ((x) << (PAGE_SHIFT-10))
@@ -878,13 +900,21 @@ static inline void prep_compound_head(struct page *page, unsigned int order)
INIT_LIST_HEAD(&folio->_deferred_list);
}
-static inline void prep_compound_tail(struct page *head, int tail_idx)
+static inline void prep_compound_tail(struct page *tail,
+ const struct page *head, unsigned int order)
{
- struct page *p = head + tail_idx;
+ tail->mapping = TAIL_MAPPING;
+ set_compound_head(tail, head, order);
+ set_page_private(tail, 0);
+}
- p->mapping = TAIL_MAPPING;
- set_compound_head(p, head);
- set_page_private(p, 0);
+static inline void init_compound_tail(struct page *tail,
+ const struct page *head, unsigned int order, struct zone *zone)
+{
+ atomic_set(&tail->_mapcount, -1);
+ set_page_node(tail, zone_to_nid(zone));
+ set_page_zone(tail, zone_idx(zone));
+ prep_compound_tail(tail, head, order);
}
void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags);
@@ -929,12 +959,59 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
unsigned long, enum meminit_context, struct vmem_altmap *, int,
bool);
+/*
+ * mm/sparse.c
+ */
#ifdef CONFIG_SPARSEMEM
void sparse_init(void);
+int sparse_index_init(unsigned long section_nr, int nid);
+
+static inline void sparse_init_one_section(struct mem_section *ms,
+ unsigned long pnum, struct page *mem_map,
+ struct mem_section_usage *usage, unsigned long flags)
+{
+ unsigned long coded_mem_map;
+
+ BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
+
+ /*
+ * We encode the start PFN of the section into the mem_map such that
+ * page_to_pfn() on !CONFIG_SPARSEMEM_VMEMMAP can simply subtract it
+ * from the page pointer to obtain the PFN.
+ */
+ coded_mem_map = (unsigned long)(mem_map - section_nr_to_pfn(pnum));
+ VM_WARN_ON_ONCE(coded_mem_map & ~SECTION_MAP_MASK);
+
+ ms->section_mem_map &= ~SECTION_MAP_MASK;
+ ms->section_mem_map |= coded_mem_map;
+ ms->section_mem_map |= flags | SECTION_HAS_MEM_MAP;
+ ms->usage = usage;
+}
+
+static inline void __section_mark_present(struct mem_section *ms,
+ unsigned long section_nr)
+{
+ if (section_nr > __highest_present_section_nr)
+ __highest_present_section_nr = section_nr;
+
+ ms->section_mem_map |= SECTION_MARKED_PRESENT;
+}
#else
static inline void sparse_init(void) {}
#endif /* CONFIG_SPARSEMEM */
+/*
+ * mm/sparse-vmemmap.c
+ */
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages);
+#else
+static inline void sparse_init_subsection_map(unsigned long pfn,
+ unsigned long nr_pages)
+{
+}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
/*
@@ -1218,6 +1295,18 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
}
return fpin;
}
+
+static inline bool vma_supports_mlock(const struct vm_area_struct *vma)
+{
+ if (vma_test_any_mask(vma, VMA_SPECIAL_FLAGS))
+ return false;
+ if (vma_test_single_mask(vma, VMA_DROPPABLE))
+ return false;
+ if (vma_is_dax(vma) || is_vm_hugetlb_page(vma))
+ return false;
+ return vma != get_gate_vma(current->mm);
+}
+
#else /* !CONFIG_MMU */
static inline void unmap_mapping_folio(struct folio *folio) { }
static inline void mlock_new_folio(struct folio *folio) { }
@@ -1450,6 +1539,8 @@ int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end,
}
#endif
+void clear_vm_uninitialized_flag(struct vm_struct *vm);
+
int __must_check __vmap_pages_range_noflush(unsigned long addr,
unsigned long end, pgprot_t prot,
struct page **pages, unsigned int page_shift);
@@ -1748,26 +1839,108 @@ int walk_page_range_debug(struct mm_struct *mm, unsigned long start,
void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm);
int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm);
-void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn);
-int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size, pgprot_t pgprot);
+int remap_pfn_range_prepare(struct vm_area_desc *desc);
+int remap_pfn_range_complete(struct vm_area_struct *vma,
+ struct mmap_action *action);
+int simple_ioremap_prepare(struct vm_area_desc *desc);
-static inline void io_remap_pfn_range_prepare(struct vm_area_desc *desc,
- unsigned long orig_pfn, unsigned long size)
+static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc)
{
+ struct mmap_action *action = &desc->action;
+ const unsigned long orig_pfn = action->remap.start_pfn;
+ const pgprot_t orig_pgprot = action->remap.pgprot;
+ const unsigned long size = action->remap.size;
const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
+ int err;
+
+ action->remap.start_pfn = pfn;
+ action->remap.pgprot = pgprot_decrypted(orig_pgprot);
+ err = remap_pfn_range_prepare(desc);
+ if (err)
+ return err;
+
+ /* Remap does the actual work. */
+ action->type = MMAP_REMAP_PFN;
+ return 0;
+}
- return remap_pfn_range_prepare(desc, pfn);
+/*
+ * When we succeed an mmap action or just before we unmap a VMA on error, we
+ * need to ensure any rmap lock held is released. On unmap it's required to
+ * avoid a deadlock.
+ */
+static inline void maybe_rmap_unlock_action(struct vm_area_struct *vma,
+ struct mmap_action *action)
+{
+ struct file *file;
+
+ if (!action->hide_from_rmap_until_complete)
+ return;
+
+ VM_WARN_ON_ONCE(vma_is_anonymous(vma));
+ file = vma->vm_file;
+ i_mmap_unlock_write(file->f_mapping);
+ action->hide_from_rmap_until_complete = false;
}
-static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma,
- unsigned long addr, unsigned long orig_pfn, unsigned long size,
- pgprot_t orig_prot)
+#ifdef CONFIG_MMU_NOTIFIER
+static inline bool clear_flush_young_ptes_notify(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
{
- const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size);
- const pgprot_t prot = pgprot_decrypted(orig_prot);
+ bool young;
- return remap_pfn_range_complete(vma, addr, pfn, size, prot);
+ young = clear_flush_young_ptes(vma, addr, ptep, nr);
+ young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr,
+ addr + nr * PAGE_SIZE);
+ return young;
}
+static inline bool pmdp_clear_flush_young_notify(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ bool young;
+
+ young = pmdp_clear_flush_young(vma, addr, pmdp);
+ young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr, addr + PMD_SIZE);
+ return young;
+}
+
+static inline bool test_and_clear_young_ptes_notify(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep, unsigned int nr)
+{
+ bool young;
+
+ young = test_and_clear_young_ptes(vma, addr, ptep, nr);
+ young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + nr * PAGE_SIZE);
+ return young;
+}
+
+static inline bool pmdp_test_and_clear_young_notify(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp)
+{
+ bool young;
+
+ young = pmdp_test_and_clear_young(vma, addr, pmdp);
+ young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE);
+ return young;
+}
+
+#else /* CONFIG_MMU_NOTIFIER */
+
+#define clear_flush_young_ptes_notify clear_flush_young_ptes
+#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define test_and_clear_young_ptes_notify test_and_clear_young_ptes
+#define pmdp_test_and_clear_young_notify pmdp_test_and_clear_young
+
+#endif /* CONFIG_MMU_NOTIFIER */
+
+extern int sysctl_max_map_count;
+static inline int get_sysctl_max_map_count(void)
+{
+ return READ_ONCE(sysctl_max_map_count);
+}
+
+bool may_expand_vm(struct mm_struct *mm, const vma_flags_t *vma_flags,
+ unsigned long npages);
+
#endif /* __MM_INTERNAL_H */