diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 13 | ||||
-rw-r--r-- | mm/filemap.c | 26 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/hugetlb.c | 1 | ||||
-rw-r--r-- | mm/hugetlb_cgroup.c | 8 | ||||
-rw-r--r-- | mm/kasan/quarantine.c | 39 | ||||
-rw-r--r-- | mm/list_lru.c | 10 | ||||
-rw-r--r-- | mm/madvise.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 9 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 18 | ||||
-rw-r--r-- | mm/mmap.c | 26 | ||||
-rw-r--r-- | mm/page-writeback.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 5 | ||||
-rw-r--r-- | mm/slab.h | 40 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/zsmalloc.c | 54 |
16 files changed, 127 insertions, 148 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index d42423f884a7..390165ffbb0f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -707,19 +707,6 @@ config ZSMALLOC returned by an alloc(). This handle must be mapped in order to access the allocated space. -config ZSMALLOC_PGTABLE_MAPPING - bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC=y - help - By default, zsmalloc uses a copy-based object mapping method to - access allocations that span two pages. However, if a particular - architecture (ex, ARM) performs VM mapping faster than copying, - then you should select this. This causes zsmalloc to use page table - mapping rather than copying for object mapping. - - You can check speed with zsmalloc benchmark: - https://github.com/spartacus06/zsmapbench - config ZSMALLOC_STAT bool "Export zsmalloc statistics" depends on ZSMALLOC diff --git a/mm/filemap.c b/mm/filemap.c index d5e7c2029d16..0b2067b3c328 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1484,11 +1484,19 @@ void end_page_writeback(struct page *page) rotate_reclaimable_page(page); } + /* + * Writeback does not hold a page reference of its own, relying + * on truncation to wait for the clearing of PG_writeback. + * But here we must make sure that the page is not freed and + * reused before the wake_up_page(). + */ + get_page(page); if (!test_clear_page_writeback(page)) BUG(); smp_mb__after_atomic(); wake_up_page(page, PG_writeback); + put_page(page); } EXPORT_SYMBOL(end_page_writeback); @@ -2347,10 +2355,15 @@ page_ok: page_not_up_to_date: /* Get exclusive access to the page ... */ - if (iocb->ki_flags & IOCB_WAITQ) + if (iocb->ki_flags & IOCB_WAITQ) { + if (written) { + put_page(page); + goto out; + } error = lock_page_async(page, iocb->ki_waitq); - else + } else { error = lock_page_killable(page); + } if (unlikely(error)) goto readpage_error; @@ -2393,10 +2406,15 @@ readpage: } if (!PageUptodate(page)) { - if (iocb->ki_flags & IOCB_WAITQ) + if (iocb->ki_flags & IOCB_WAITQ) { + if (written) { + put_page(page); + goto out; + } error = lock_page_async(page, iocb->ki_waitq); - else + } else { error = lock_page_killable(page); + } if (unlikely(error)) goto readpage_error; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9474dbc150ed..ec2bb93f7431 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -710,7 +710,6 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) transparent_hugepage_use_zero_page()) { pgtable_t pgtable; struct page *zero_page; - bool set; vm_fault_t ret; pgtable = pte_alloc_one(vma->vm_mm); if (unlikely(!pgtable)) @@ -723,25 +722,25 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ret = 0; - set = false; if (pmd_none(*vmf->pmd)) { ret = check_stable_address_space(vma->vm_mm); if (ret) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); + pte_free(vma->vm_mm, pgtable); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); } else { set_huge_zero_page(pgtable, vma->vm_mm, vma, haddr, vmf->pmd, zero_page); spin_unlock(vmf->ptl); - set = true; } - } else + } else { spin_unlock(vmf->ptl); - if (!set) pte_free(vma->vm_mm, pgtable); + } return ret; } gfp = alloc_hugepage_direct_gfpmask(vma); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 37f15c3c24dc..d029d938d26d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1216,6 +1216,7 @@ static void destroy_compound_gigantic_page(struct page *page, } set_compound_order(page, 0); + page[1].compound_nr = 0; __ClearPageHead(page); } diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 1f87aec9ab5c..9182848dda3e 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -82,11 +82,8 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) for (idx = 0; idx < hugetlb_max_hstate; idx++) { if (page_counter_read( - hugetlb_cgroup_counter_from_cgroup(h_cg, idx)) || - page_counter_read(hugetlb_cgroup_counter_from_cgroup_rsvd( - h_cg, idx))) { + hugetlb_cgroup_counter_from_cgroup(h_cg, idx))) return true; - } } return false; } @@ -202,9 +199,10 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css) struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); struct hstate *h; struct page *page; - int idx = 0; + int idx; do { + idx = 0; for_each_hstate(h) { spin_lock(&hugetlb_lock); list_for_each_entry(page, &h->hugepage_activelist, lru) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 4c5375810449..0e3f8494628f 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -29,6 +29,7 @@ #include <linux/srcu.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/cpuhotplug.h> #include "../slab.h" #include "kasan.h" @@ -43,6 +44,7 @@ struct qlist_head { struct qlist_node *head; struct qlist_node *tail; size_t bytes; + bool offline; }; #define QLIST_INIT { NULL, NULL, 0 } @@ -188,6 +190,10 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) local_irq_save(flags); q = this_cpu_ptr(&cpu_quarantine); + if (q->offline) { + local_irq_restore(flags); + return; + } qlist_put(q, &info->quarantine_link, cache->size); if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); @@ -328,3 +334,36 @@ void quarantine_remove_cache(struct kmem_cache *cache) synchronize_srcu(&remove_cache_srcu); } + +static int kasan_cpu_online(unsigned int cpu) +{ + this_cpu_ptr(&cpu_quarantine)->offline = false; + return 0; +} + +static int kasan_cpu_offline(unsigned int cpu) +{ + struct qlist_head *q; + + q = this_cpu_ptr(&cpu_quarantine); + /* Ensure the ordering between the writing to q->offline and + * qlist_free_all. Otherwise, cpu_quarantine may be corrupted + * by interrupt. + */ + WRITE_ONCE(q->offline, true); + barrier(); + qlist_free_all(q, NULL); + return 0; +} + +static int __init kasan_cpu_quarantine_init(void) +{ + int ret = 0; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/kasan:online", + kasan_cpu_online, kasan_cpu_offline); + if (ret < 0) + pr_err("kasan cpu quarantine register failed [%d]\n", ret); + return ret; +} +late_initcall(kasan_cpu_quarantine_init); diff --git a/mm/list_lru.c b/mm/list_lru.c index 5aa6e44bc2ae..fe230081690b 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -534,7 +534,6 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, struct list_lru_node *nlru = &lru->node[nid]; int dst_idx = dst_memcg->kmemcg_id; struct list_lru_one *src, *dst; - bool set; /* * Since list_lru_{add,del} may be called under an IRQ-safe lock, @@ -546,11 +545,12 @@ static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, dst = list_lru_from_memcg_idx(nlru, dst_idx); list_splice_init(&src->list, &dst->list); - set = (!dst->nr_items && src->nr_items); - dst->nr_items += src->nr_items; - if (set) + + if (src->nr_items) { + dst->nr_items += src->nr_items; memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); - src->nr_items = 0; + src->nr_items = 0; + } spin_unlock_irq(&nlru->lock); } diff --git a/mm/madvise.c b/mm/madvise.c index 416a56b8e757..13f5677b9322 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -226,7 +226,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma, struct address_space *mapping) { XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start)); - pgoff_t end_index = end / PAGE_SIZE; + pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1); struct page *page; rcu_read_lock(); @@ -1204,8 +1204,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, goto put_pid; } - if (task->mm != current->mm && - !process_madvise_behavior_valid(behavior)) { + if (!process_madvise_behavior_valid(behavior)) { ret = -EINVAL; goto release_task; } @@ -1231,8 +1230,6 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, ret = total_len - iov_iter_count(&iter); mmput(mm); - return ret; - release_task: put_task_struct(task); put_pid: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dcbf24d2227..29459a6ce1c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -867,8 +867,13 @@ void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) rcu_read_lock(); memcg = mem_cgroup_from_obj(p); - /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!memcg || memcg == root_mem_cgroup) { + /* + * Untracked pages have no memcg, no lruvec. Update only the + * node. If we reparent the slab objects to the root memcg, + * when we free the slab object, we need to update the per-memcg + * vmstats to keep it correct for the root memcg. + */ + if (!memcg) { __mod_node_page_state(pgdat, idx, val); } else { lruvec = mem_cgroup_lruvec(memcg, pgdat); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b44d4c7ba73b..63b2e46b6555 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -350,24 +350,6 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, return err; } -#ifdef CONFIG_NUMA -int __weak memory_add_physaddr_to_nid(u64 start) -{ - pr_info_once("Unknown online node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); - -int __weak phys_to_target_node(u64 start) -{ - pr_info_once("Unknown target node for memory at 0x%llx, assuming node 0\n", - start); - return 0; -} -EXPORT_SYMBOL_GPL(phys_to_target_node); -#endif - /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, diff --git a/mm/mmap.c b/mm/mmap.c index d91ecb00d38c..5c8b4485860d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1808,6 +1808,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (error) goto unmap_and_free_vma; + /* Can addr have changed?? + * + * Answer: Yes, several device drivers can do it in their + * f_op->mmap method. -DaveM + * Bug: If addr is changed, prev, rb_link, rb_parent should + * be updated for vma_link() + */ + WARN_ON_ONCE(addr != vma->vm_start); + + addr = vma->vm_start; + /* If vm_flags changed after call_mmap(), we should try merge vma again * as we may succeed this time. */ @@ -1822,25 +1833,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, fput(vma->vm_file); vm_area_free(vma); vma = merge; - /* Update vm_flags and possible addr to pick up the change. We don't - * warn here if addr changed as the vma is not linked by vma_link(). - */ - addr = vma->vm_start; + /* Update vm_flags to pick up the change. */ vm_flags = vma->vm_flags; goto unmap_writable; } } - /* Can addr have changed?? - * - * Answer: Yes, several device drivers can do it in their - * f_op->mmap method. -DaveM - * Bug: If addr is changed, prev, rb_link, rb_parent should - * be updated for vma_link() - */ - WARN_ON_ONCE(addr != vma->vm_start); - - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { error = shmem_zero_setup(vma); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7709f0e223f5..586042472ac9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2754,12 +2754,6 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } - /* - * NOTE: Page might be free now! Writeback doesn't hold a page - * reference on its own, it relies on truncation to wait for - * the clearing of PG_writeback. The below can only access - * page state that is static across allocation cycles. - */ if (ret) { dec_lruvec_state(lruvec, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23f5066bd4a5..eaa227a479e4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5103,6 +5103,11 @@ refill: if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) goto refill; + if (unlikely(nc->pfmemalloc)) { + free_the_page(page, compound_order(page)); + goto refill; + } + #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* if size can vary use size else just use PAGE_SIZE */ size = nc->size; diff --git a/mm/slab.h b/mm/slab.h index 6d7c6a5056ba..f9977d6613d6 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -274,22 +274,32 @@ static inline size_t obj_full_size(struct kmem_cache *s) return s->size + sizeof(struct obj_cgroup *); } -static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, - size_t objects, - gfp_t flags) +/* + * Returns false if the allocation should fail. + */ +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) { struct obj_cgroup *objcg; + if (!memcg_kmem_enabled()) + return true; + + if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)) + return true; + objcg = get_obj_cgroup_from_current(); if (!objcg) - return NULL; + return true; if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) { obj_cgroup_put(objcg); - return NULL; + return false; } - return objcg; + *objcgp = objcg; + return true; } static inline void mod_objcg_state(struct obj_cgroup *objcg, @@ -315,7 +325,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, unsigned long off; size_t i; - if (!objcg) + if (!memcg_kmem_enabled() || !objcg) return; flags &= ~__GFP_ACCOUNT; @@ -400,11 +410,11 @@ static inline void memcg_free_page_obj_cgroups(struct page *page) { } -static inline struct obj_cgroup *memcg_slab_pre_alloc_hook(struct kmem_cache *s, - size_t objects, - gfp_t flags) +static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, + struct obj_cgroup **objcgp, + size_t objects, gfp_t flags) { - return NULL; + return true; } static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, @@ -508,9 +518,8 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, if (should_failslab(s, flags)) return NULL; - if (memcg_kmem_enabled() && - ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT))) - *objcgp = memcg_slab_pre_alloc_hook(s, size, flags); + if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) + return NULL; return s; } @@ -529,8 +538,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, s->flags, flags); } - if (memcg_kmem_enabled()) - memcg_slab_post_alloc_hook(s, objcg, flags, size, p); + memcg_slab_post_alloc_hook(s, objcg, flags, size, p); } #ifndef CONFIG_SLOB diff --git a/mm/swapfile.c b/mm/swapfile.c index c4a613688a17..d58361109066 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2867,6 +2867,7 @@ late_initcall(max_swapfiles_check); static struct swap_info_struct *alloc_swap_info(void) { struct swap_info_struct *p; + struct swap_info_struct *defer = NULL; unsigned int type; int i; @@ -2895,7 +2896,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); WRITE_ONCE(nr_swapfiles, nr_swapfiles + 1); } else { - kvfree(p); + defer = p; p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() @@ -2908,6 +2909,7 @@ static struct swap_info_struct *alloc_swap_info(void) plist_node_init(&p->avail_lists[i], 0); p->flags = SWP_USED; spin_unlock(&swap_lock); + kvfree(defer); spin_lock_init(&p->lock); spin_lock_init(&p->cont_lock); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 918c7b019b3d..cdfaaadea8ff 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -293,11 +293,7 @@ struct zspage { }; struct mapping_area { -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else char *vm_buf; /* copy buffer for objects that span pages */ -#endif char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; @@ -1113,54 +1109,6 @@ static struct zspage *find_get_zspage(struct size_class *class) return zspage; } -#ifdef CONFIG_ZSMALLOC_PGTABLE_MAPPING -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm) - return 0; - area->vm = get_vm_area(PAGE_SIZE * 2, 0); - if (!area->vm) - return -ENOMEM; - - /* - * Populate ptes in advance to avoid pte allocation with GFP_KERNEL - * in non-preemtible context of zs_map_object. - */ - return apply_to_page_range(&init_mm, (unsigned long)area->vm->addr, - PAGE_SIZE * 2, NULL, NULL); -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; -} - -static inline void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm->addr; - - BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0); - area->vm_addr = area->vm->addr; - return area->vm_addr + off; -} - -static inline void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm_addr; - - unmap_kernel_range(addr, PAGE_SIZE * 2); -} - -#else /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static inline int __zs_cpu_up(struct mapping_area *area) { /* @@ -1241,8 +1189,6 @@ out: pagefault_enable(); } -#endif /* CONFIG_ZSMALLOC_PGTABLE_MAPPING */ - static int zs_cpu_prepare(unsigned int cpu) { struct mapping_area *area; |