diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 25 | ||||
-rw-r--r-- | mm/gup.c | 13 | ||||
-rw-r--r-- | mm/memcontrol.c | 12 | ||||
-rw-r--r-- | mm/memory.c | 27 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/shmem.c | 11 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/sparse.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/userfaultfd.c | 29 | ||||
-rw-r--r-- | mm/vmalloc.c | 7 | ||||
-rw-r--r-- | mm/vmscan.c | 24 |
14 files changed, 103 insertions, 66 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index d62150418b91..60a9cc593e9b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2712,14 +2712,12 @@ int kiocb_write_and_wait(struct kiocb *iocb, size_t count) } EXPORT_SYMBOL_GPL(kiocb_write_and_wait); -int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) +int filemap_invalidate_pages(struct address_space *mapping, + loff_t pos, loff_t end, bool nowait) { - struct address_space *mapping = iocb->ki_filp->f_mapping; - loff_t pos = iocb->ki_pos; - loff_t end = pos + count - 1; int ret; - if (iocb->ki_flags & IOCB_NOWAIT) { + if (nowait) { /* we could block if there are any pages in the range */ if (filemap_range_has_page(mapping, pos, end)) return -EAGAIN; @@ -2738,6 +2736,15 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT); } + +int kiocb_invalidate_pages(struct kiocb *iocb, size_t count) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + + return filemap_invalidate_pages(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1, + iocb->ki_flags & IOCB_NOWAIT); +} EXPORT_SYMBOL_GPL(kiocb_invalidate_pages); /** @@ -3987,7 +3994,6 @@ ssize_t generic_perform_write(struct kiocb *iocb, struct iov_iter *i) ssize_t written = 0; do { - struct page *page; struct folio *folio; size_t offset; /* Offset into folio */ size_t bytes; /* Bytes to write to folio */ @@ -4017,11 +4023,10 @@ retry: } status = a_ops->write_begin(file, mapping, pos, bytes, - &page, &fsdata); + &folio, &fsdata); if (unlikely(status < 0)) break; - folio = page_folio(page); offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) bytes = folio_size(folio) - offset; @@ -4033,7 +4038,7 @@ retry: flush_dcache_folio(folio); status = a_ops->write_end(file, mapping, pos, bytes, copied, - page, fsdata); + folio, fsdata); if (unlikely(status != copied)) { iov_iter_revert(i, copied - max(status, 0L)); if (unlikely(status < 0)) @@ -4231,7 +4236,7 @@ int filemap_invalidate_inode(struct inode *inode, bool flush, } /* Wait for writeback to complete on all folios and discard. */ - truncate_inode_pages_range(mapping, start, end); + invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE); unlock: filemap_invalidate_unlock(mapping); @@ -416,6 +416,19 @@ void unpin_user_pages(struct page **pages, unsigned long npages) EXPORT_SYMBOL(unpin_user_pages); /** + * unpin_user_folio() - release pages of a folio + * @folio: pointer to folio to be released + * @npages: number of pages of same folio + * + * Release npages of the folio + */ +void unpin_user_folio(struct folio *folio, unsigned long npages) +{ + gup_put_folio(folio, npages, FOLL_PIN); +} +EXPORT_SYMBOL(unpin_user_folio); + +/** * unpin_folios() - release an array of gup-pinned folios. * @folios: array of folios to be marked dirty and released. * @nfolios: number of folios in the @folios array. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f29157288b7d..d563fb515766 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3613,8 +3613,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) memcg1_soft_limit_reset(memcg); #ifdef CONFIG_ZSWAP memcg->zswap_max = PAGE_COUNTER_MAX; - WRITE_ONCE(memcg->zswap_writeback, - !parent || READ_ONCE(parent->zswap_writeback)); + WRITE_ONCE(memcg->zswap_writeback, true); #endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { @@ -5320,7 +5319,14 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size) bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) { /* if zswap is disabled, do not block pages going to the swapping device */ - return !zswap_is_enabled() || !memcg || READ_ONCE(memcg->zswap_writeback); + if (!zswap_is_enabled()) + return true; + + for (; memcg; memcg = parent_mem_cgroup(memcg)) + if (!READ_ONCE(memcg->zswap_writeback)) + return false; + + return true; } static u64 zswap_current_read(struct cgroup_subsys_state *css, diff --git a/mm/memory.c b/mm/memory.c index 3c01d68065be..ebfc9768f801 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2632,11 +2632,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd, return 0; } -/* - * Variant of remap_pfn_range that does not call track_pfn_remap. The caller - * must have pre-validated the caching bits of the pgprot_t. - */ -int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, +static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t prot) { pgd_t *pgd; @@ -2689,6 +2685,27 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, return 0; } +/* + * Variant of remap_pfn_range that does not call track_pfn_remap. The caller + * must have pre-validated the caching bits of the pgprot_t. + */ +int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + int error = remap_pfn_range_internal(vma, addr, pfn, size, prot); + + if (!error) + return 0; + + /* + * A partial pfn range mapping is dangerous: it does not + * maintain page reference counts, and callers may free + * pages due to the error. So zap it early. + */ + zap_page_range_single(vma, addr, size, NULL); + return error; +} + /** * remap_pfn_range - remap kernel memory to userspace * @vma: user vma to map to diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 66267c26ca1b..951878ab627a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1681,7 +1681,7 @@ struct range __weak arch_get_mappable_range(void) struct range mhp_get_pluggable_range(bool need_mapping) { - const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1; + const u64 max_phys = PHYSMEM_END; struct range mhp_range; if (need_mapping) { diff --git a/mm/mmap.c b/mm/mmap.c index d0dfc85b209b..6ddb278a5ee8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1229,7 +1229,7 @@ static inline u64 file_mmap_size_max(struct file *file, struct inode *inode) return MAX_LFS_FILESIZE; /* Special "we do even unsigned file positions" case */ - if (file->f_mode & FMODE_UNSIGNED_OFFSET) + if (file->f_op->fop_flags & FOP_UNSIGNED_OFFSET) return 0; /* Yes, random drivers might want more. But I'm tired of buggy drivers */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c565de8f48e9..91ace8ca97e2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1054,6 +1054,13 @@ __always_inline bool free_pages_prepare(struct page *page, reset_page_owner(page, order); page_table_check_free(page, order); pgalloc_tag_sub(page, 1 << order); + + /* + * The page is isolated and accounted for. + * Mark the codetag as empty to avoid accounting error + * when the page is freed by unpoison_memory(). + */ + clear_page_tag_ref(page); return false; } diff --git a/mm/shmem.c b/mm/shmem.c index 5a77acf6ac6a..b875852df51f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2878,7 +2878,7 @@ static const struct inode_operations shmem_short_symlink_operations; static int shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); @@ -2899,23 +2899,22 @@ shmem_write_begin(struct file *file, struct address_space *mapping, if (ret) return ret; - *pagep = folio_file_page(folio, index); - if (PageHWPoison(*pagep)) { + if (folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { folio_unlock(folio); folio_put(folio); - *pagep = NULL; return -EIO; } + *foliop = folio; return 0; } static int shmem_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { - struct folio *folio = page_folio(page); struct inode *inode = mapping->host; if (pos + copied > inode->i_size) diff --git a/mm/slub.c b/mm/slub.c index 3d454888e921..21f71cb6cc06 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2109,6 +2109,10 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p, if (!mem_alloc_profiling_enabled()) return; + /* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */ + if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE)) + return; + obj_exts = slab_obj_exts(slab); if (!obj_exts) return; diff --git a/mm/sparse.c b/mm/sparse.c index 0f018c6f9ec5..dc38539f8560 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section) static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, unsigned long *end_pfn) { - unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); + unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT; /* * Sanity checks - do not allow an architecture to pass diff --git a/mm/truncate.c b/mm/truncate.c index 4d61fbdd4b2f..0668cd340a46 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -157,7 +157,7 @@ static void truncate_cleanup_folio(struct folio *folio) if (folio_mapped(folio)) unmap_mapping_folio(folio); - if (folio_has_private(folio)) + if (folio_needs_release(folio)) folio_invalidate(folio, 0, folio_size(folio)); /* @@ -219,7 +219,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!mapping_inaccessible(folio->mapping)) folio_zero_range(folio, offset, length); - if (folio_has_private(folio)) + if (folio_needs_release(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e54e5c8907fa..acc56c75ba99 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -787,27 +787,30 @@ retry: } dst_pmdval = pmdp_get_lockless(dst_pmd); - /* - * If the dst_pmd is mapped as THP don't - * override it and just be strict. - */ - if (unlikely(pmd_trans_huge(dst_pmdval))) { - err = -EEXIST; - break; - } if (unlikely(pmd_none(dst_pmdval)) && unlikely(__pte_alloc(dst_mm, dst_pmd))) { err = -ENOMEM; break; } - /* If an huge pmd materialized from under us fail */ - if (unlikely(pmd_trans_huge(*dst_pmd))) { + dst_pmdval = pmdp_get_lockless(dst_pmd); + /* + * If the dst_pmd is THP don't override it and just be strict. + * (This includes the case where the PMD used to be THP and + * changed back to none after __pte_alloc().) + */ + if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) || + pmd_devmap(dst_pmdval))) { + err = -EEXIST; + break; + } + if (unlikely(pmd_bad(dst_pmdval))) { err = -EFAULT; break; } - - BUG_ON(pmd_none(*dst_pmd)); - BUG_ON(pmd_trans_huge(*dst_pmd)); + /* + * For shmem mappings, khugepaged is allowed to remove page + * tables under us; pte_offset_map_lock() will deal with that. + */ err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, src_addr, flags, &folio); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index af2de36549d6..a0df1e2e155a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2191,6 +2191,7 @@ static void purge_vmap_node(struct work_struct *work) { struct vmap_node *vn = container_of(work, struct vmap_node, purge_work); + unsigned long nr_purged_pages = 0; struct vmap_area *va, *n_va; LIST_HEAD(local_list); @@ -2208,7 +2209,7 @@ static void purge_vmap_node(struct work_struct *work) kasan_release_vmalloc(orig_start, orig_end, va->va_start, va->va_end); - atomic_long_sub(nr, &vmap_lazy_nr); + nr_purged_pages += nr; vn->nr_purged++; if (is_vn_id_valid(vn_id) && !vn->skip_populate) @@ -2219,6 +2220,8 @@ static void purge_vmap_node(struct work_struct *work) list_add(&va->list, &local_list); } + atomic_long_sub(nr_purged_pages, &vmap_lazy_nr); + reclaim_list_global(&local_list); } @@ -2626,6 +2629,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) vb->dirty_max = 0; bitmap_set(vb->used_map, 0, (1UL << order)); INIT_LIST_HEAD(&vb->free_list); + vb->cpu = raw_smp_processor_id(); xa = addr_to_vb_xa(va->va_start); vb_idx = addr_to_vb_idx(va->va_start); @@ -2642,7 +2646,6 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) * integrity together with list_for_each_rcu from read * side. */ - vb->cpu = raw_smp_processor_id(); vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); diff --git a/mm/vmscan.c b/mm/vmscan.c index cfa839284b92..bd489c1af228 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1604,25 +1604,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, } -#ifdef CONFIG_CMA -/* - * It is waste of effort to scan and reclaim CMA pages if it is not available - * for current allocation context. Kswapd can not be enrolled as it can not - * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL - */ -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return !current_is_kswapd() && - gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && - folio_migratetype(folio) == MIGRATE_CMA; -} -#else -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return false; -} -#endif - /* * Isolating page from the lruvec to fill in @dst list by nr_to_scan times. * @@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx || - skip_cma(folio, sc)) { + if (folio_zonenum(folio) > sc->reclaim_idx) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; goto move; @@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* ineligible */ - if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { + if (zone > sc->reclaim_idx) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; |