diff options
Diffstat (limited to 'mm/internal.h')
| -rw-r--r-- | mm/internal.h | 217 |
1 files changed, 195 insertions, 22 deletions
diff --git a/mm/internal.h b/mm/internal.h index cb0af847d7d9..c693646e5b3f 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -11,6 +11,7 @@ #include <linux/khugepaged.h> #include <linux/mm.h> #include <linux/mm_inline.h> +#include <linux/mmu_notifier.h> #include <linux/pagemap.h> #include <linux/pagewalk.h> #include <linux/rmap.h> @@ -516,14 +517,30 @@ void free_pgtables(struct mmu_gather *tlb, struct unmap_desc *desc); void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte); +/** + * sync_with_folio_pmd_zap - sync with concurrent zapping of a folio PMD + * @mm: The mm_struct. + * @pmdp: Pointer to the pmd that was found to be pmd_none(). + * + * When we find a pmd_none() while unmapping a folio without holding the PTL, + * zap_huge_pmd() may have cleared the PMD but not yet modified the folio to + * indicate that it's unmapped. Skipping the PMD without synchronization could + * make folio unmapping code assume that unmapping failed. + * + * Wait for concurrent zapping to complete by grabbing the PTL. + */ +static inline void sync_with_folio_pmd_zap(struct mm_struct *mm, pmd_t *pmdp) +{ + spinlock_t *ptl = pmd_lock(mm, pmdp); + + spin_unlock(ptl); +} + struct zap_details; -void unmap_page_range(struct mmu_gather *tlb, - struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - struct zap_details *details); -void zap_page_range_single_batched(struct mmu_gather *tlb, +void zap_vma_range_batched(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long size, struct zap_details *details); +int zap_vma_for_reaping(struct vm_area_struct *vma); int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp); @@ -624,6 +641,11 @@ int user_proactive_reclaim(char *buf, pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); /* + * in mm/khugepaged.c + */ +void set_recommended_min_free_kbytes(void); + +/* * in mm/page_alloc.c */ #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -878,13 +900,21 @@ static inline void prep_compound_head(struct page *page, unsigned int order) INIT_LIST_HEAD(&folio->_deferred_list); } -static inline void prep_compound_tail(struct page *head, int tail_idx) +static inline void prep_compound_tail(struct page *tail, + const struct page *head, unsigned int order) { - struct page *p = head + tail_idx; + tail->mapping = TAIL_MAPPING; + set_compound_head(tail, head, order); + set_page_private(tail, 0); +} - p->mapping = TAIL_MAPPING; - set_compound_head(p, head); - set_page_private(p, 0); +static inline void init_compound_tail(struct page *tail, + const struct page *head, unsigned int order, struct zone *zone) +{ + atomic_set(&tail->_mapcount, -1); + set_page_node(tail, zone_to_nid(zone)); + set_page_zone(tail, zone_idx(zone)); + prep_compound_tail(tail, head, order); } void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); @@ -929,12 +959,59 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int, bool); +/* + * mm/sparse.c + */ #ifdef CONFIG_SPARSEMEM void sparse_init(void); +int sparse_index_init(unsigned long section_nr, int nid); + +static inline void sparse_init_one_section(struct mem_section *ms, + unsigned long pnum, struct page *mem_map, + struct mem_section_usage *usage, unsigned long flags) +{ + unsigned long coded_mem_map; + + BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT); + + /* + * We encode the start PFN of the section into the mem_map such that + * page_to_pfn() on !CONFIG_SPARSEMEM_VMEMMAP can simply subtract it + * from the page pointer to obtain the PFN. + */ + coded_mem_map = (unsigned long)(mem_map - section_nr_to_pfn(pnum)); + VM_WARN_ON_ONCE(coded_mem_map & ~SECTION_MAP_MASK); + + ms->section_mem_map &= ~SECTION_MAP_MASK; + ms->section_mem_map |= coded_mem_map; + ms->section_mem_map |= flags | SECTION_HAS_MEM_MAP; + ms->usage = usage; +} + +static inline void __section_mark_present(struct mem_section *ms, + unsigned long section_nr) +{ + if (section_nr > __highest_present_section_nr) + __highest_present_section_nr = section_nr; + + ms->section_mem_map |= SECTION_MARKED_PRESENT; +} #else static inline void sparse_init(void) {} #endif /* CONFIG_SPARSEMEM */ +/* + * mm/sparse-vmemmap.c + */ +#ifdef CONFIG_SPARSEMEM_VMEMMAP +void sparse_init_subsection_map(unsigned long pfn, unsigned long nr_pages); +#else +static inline void sparse_init_subsection_map(unsigned long pfn, + unsigned long nr_pages) +{ +} +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -1218,6 +1295,18 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, } return fpin; } + +static inline bool vma_supports_mlock(const struct vm_area_struct *vma) +{ + if (vma_test_any_mask(vma, VMA_SPECIAL_FLAGS)) + return false; + if (vma_test_single_mask(vma, VMA_DROPPABLE)) + return false; + if (vma_is_dax(vma) || is_vm_hugetlb_page(vma)) + return false; + return vma != get_gate_vma(current->mm); +} + #else /* !CONFIG_MMU */ static inline void unmap_mapping_folio(struct folio *folio) { } static inline void mlock_new_folio(struct folio *folio) { } @@ -1450,6 +1539,8 @@ int __must_check vmap_pages_range_noflush(unsigned long addr, unsigned long end, } #endif +void clear_vm_uninitialized_flag(struct vm_struct *vm); + int __must_check __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); @@ -1748,26 +1839,108 @@ int walk_page_range_debug(struct mm_struct *mm, unsigned long start, void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm); int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm); -void remap_pfn_range_prepare(struct vm_area_desc *desc, unsigned long pfn); -int remap_pfn_range_complete(struct vm_area_struct *vma, unsigned long addr, - unsigned long pfn, unsigned long size, pgprot_t pgprot); +int remap_pfn_range_prepare(struct vm_area_desc *desc); +int remap_pfn_range_complete(struct vm_area_struct *vma, + struct mmap_action *action); +int simple_ioremap_prepare(struct vm_area_desc *desc); -static inline void io_remap_pfn_range_prepare(struct vm_area_desc *desc, - unsigned long orig_pfn, unsigned long size) +static inline int io_remap_pfn_range_prepare(struct vm_area_desc *desc) { + struct mmap_action *action = &desc->action; + const unsigned long orig_pfn = action->remap.start_pfn; + const pgprot_t orig_pgprot = action->remap.pgprot; + const unsigned long size = action->remap.size; const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size); + int err; + + action->remap.start_pfn = pfn; + action->remap.pgprot = pgprot_decrypted(orig_pgprot); + err = remap_pfn_range_prepare(desc); + if (err) + return err; + + /* Remap does the actual work. */ + action->type = MMAP_REMAP_PFN; + return 0; +} - return remap_pfn_range_prepare(desc, pfn); +/* + * When we succeed an mmap action or just before we unmap a VMA on error, we + * need to ensure any rmap lock held is released. On unmap it's required to + * avoid a deadlock. + */ +static inline void maybe_rmap_unlock_action(struct vm_area_struct *vma, + struct mmap_action *action) +{ + struct file *file; + + if (!action->hide_from_rmap_until_complete) + return; + + VM_WARN_ON_ONCE(vma_is_anonymous(vma)); + file = vma->vm_file; + i_mmap_unlock_write(file->f_mapping); + action->hide_from_rmap_until_complete = false; } -static inline int io_remap_pfn_range_complete(struct vm_area_struct *vma, - unsigned long addr, unsigned long orig_pfn, unsigned long size, - pgprot_t orig_prot) +#ifdef CONFIG_MMU_NOTIFIER +static inline bool clear_flush_young_ptes_notify(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) { - const unsigned long pfn = io_remap_pfn_range_pfn(orig_pfn, size); - const pgprot_t prot = pgprot_decrypted(orig_prot); + bool young; - return remap_pfn_range_complete(vma, addr, pfn, size, prot); + young = clear_flush_young_ptes(vma, addr, ptep, nr); + young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr, + addr + nr * PAGE_SIZE); + return young; } +static inline bool pmdp_clear_flush_young_notify(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + bool young; + + young = pmdp_clear_flush_young(vma, addr, pmdp); + young |= mmu_notifier_clear_flush_young(vma->vm_mm, addr, addr + PMD_SIZE); + return young; +} + +static inline bool test_and_clear_young_ptes_notify(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + bool young; + + young = test_and_clear_young_ptes(vma, addr, ptep, nr); + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + nr * PAGE_SIZE); + return young; +} + +static inline bool pmdp_test_and_clear_young_notify(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + bool young; + + young = pmdp_test_and_clear_young(vma, addr, pmdp); + young |= mmu_notifier_clear_young(vma->vm_mm, addr, addr + PMD_SIZE); + return young; +} + +#else /* CONFIG_MMU_NOTIFIER */ + +#define clear_flush_young_ptes_notify clear_flush_young_ptes +#define pmdp_clear_flush_young_notify pmdp_clear_flush_young +#define test_and_clear_young_ptes_notify test_and_clear_young_ptes +#define pmdp_test_and_clear_young_notify pmdp_test_and_clear_young + +#endif /* CONFIG_MMU_NOTIFIER */ + +extern int sysctl_max_map_count; +static inline int get_sysctl_max_map_count(void) +{ + return READ_ONCE(sysctl_max_map_count); +} + +bool may_expand_vm(struct mm_struct *mm, const vma_flags_t *vma_flags, + unsigned long npages); + #endif /* __MM_INTERNAL_H */ |
