diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 08:58:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-17 08:58:04 -0700 |
commit | 57a8ec387e1441ea5e1232bc0749fb99a8cba7e7 (patch) | |
tree | b5fb03fc6bc5754de8b5b1f8b0e4f36d67c8315c /mm | |
parent | 0a8ad0ffa4d80a544f6cbff703bf6394339afcdf (diff) | |
parent | 43e11fa2d1d3b6e35629fa556eb7d571edba2010 (diff) | |
download | lwn-57a8ec387e1441ea5e1232bc0749fb99a8cba7e7.tar.gz lwn-57a8ec387e1441ea5e1232bc0749fb99a8cba7e7.zip |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"VM:
- z3fold fixes and enhancements by Henry Burns and Vitaly Wool
- more accurate reclaimed slab caches calculations by Yafang Shao
- fix MAP_UNINITIALIZED UAPI symbol to not depend on config, by
Christoph Hellwig
- !CONFIG_MMU fixes by Christoph Hellwig
- new novmcoredd parameter to omit device dumps from vmcore, by
Kairui Song
- new test_meminit module for testing heap and pagealloc
initialization, by Alexander Potapenko
- ioremap improvements for huge mappings, by Anshuman Khandual
- generalize kprobe page fault handling, by Anshuman Khandual
- device-dax hotplug fixes and improvements, by Pavel Tatashin
- enable synchronous DAX fault on powerpc, by Aneesh Kumar K.V
- add pte_devmap() support for arm64, by Robin Murphy
- unify locked_vm accounting with a helper, by Daniel Jordan
- several misc fixes
core/lib:
- new typeof_member() macro including some users, by Alexey Dobriyan
- make BIT() and GENMASK() available in asm, by Masahiro Yamada
- changed LIST_POISON2 on x86_64 to 0xdead000000000122 for better
code generation, by Alexey Dobriyan
- rbtree code size optimizations, by Michel Lespinasse
- convert struct pid count to refcount_t, by Joel Fernandes
get_maintainer.pl:
- add --no-moderated switch to skip moderated ML's, by Joe Perches
misc:
- ptrace PTRACE_GET_SYSCALL_INFO interface
- coda updates
- gdb scripts, various"
[ Using merge message suggestion from Vlastimil Babka, with some editing - Linus ]
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (100 commits)
fs/select.c: use struct_size() in kmalloc()
mm: add account_locked_vm utility function
arm64: mm: implement pte_devmap support
mm: introduce ARCH_HAS_PTE_DEVMAP
mm: clean up is_device_*_page() definitions
mm/mmap: move common defines to mman-common.h
mm: move MAP_SYNC to asm-generic/mman-common.h
device-dax: "Hotremove" persistent memory that is used like normal RAM
mm/hotplug: make remove_memory() interface usable
device-dax: fix memory and resource leak if hotplug fails
include/linux/lz4.h: fix spelling and copy-paste errors in documentation
ipc/mqueue.c: only perform resource calculation if user valid
include/asm-generic/bug.h: fix "cut here" for WARN_ON for __WARN_TAINT architectures
scripts/gdb: add helpers to find and list devices
scripts/gdb: add lx-genpd-summary command
drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl
kernel/pid.c: convert struct pid count to refcount_t
drivers/rapidio/devices/rio_mport_cdev.c: NUL terminate some strings
select: shift restore_saved_sigmask_unless() into poll_select_copy_remaining()
select: change do_poll() to return -ERESTARTNOHAND rather than -EINTR
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 5 | ||||
-rw-r--r-- | mm/cma.c | 15 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 22 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 64 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/slab_common.c | 3 | ||||
-rw-r--r-- | mm/util.c | 75 | ||||
-rw-r--r-- | mm/vmscan.c | 44 | ||||
-rw-r--r-- | mm/z3fold.c | 29 |
12 files changed, 206 insertions, 65 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 495d7368ced8..56cec636a1fc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -649,8 +649,7 @@ config IDLE_PAGE_TRACKING See Documentation/admin-guide/mm/idle_page_tracking.rst for more details. -# arch_add_memory() comprehends device memory -config ARCH_HAS_ZONE_DEVICE +config ARCH_HAS_PTE_DEVMAP bool config ZONE_DEVICE @@ -658,7 +657,7 @@ config ZONE_DEVICE depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP - depends on ARCH_HAS_ZONE_DEVICE + depends on ARCH_HAS_PTE_DEVMAP select XARRAY_MULTI help @@ -278,6 +278,12 @@ int __init cma_declare_contiguous(phys_addr_t base, */ alignment = max(alignment, (phys_addr_t)PAGE_SIZE << max_t(unsigned long, MAX_ORDER - 1, pageblock_order)); + if (fixed && base & (alignment - 1)) { + ret = -EINVAL; + pr_err("Region at %pa must be aligned to %pa bytes\n", + &base, &alignment); + goto err; + } base = ALIGN(base, alignment); size = ALIGN(size, alignment); limit &= ~(alignment - 1); @@ -308,6 +314,13 @@ int __init cma_declare_contiguous(phys_addr_t base, if (limit == 0 || limit > memblock_end) limit = memblock_end; + if (base + size > limit) { + ret = -EINVAL; + pr_err("Size (%pa) of region at %pa exceeds limit (%pa)\n", + &size, &base, &limit); + goto err; + } + /* Reserve memory */ if (fixed) { if (memblock_is_region_reserved(base, size) || @@ -494,7 +507,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, * @pages: Allocated pages. * @count: Number of allocated pages. * - * This function releases memory allocated by alloc_cma(). + * This function releases memory allocated by cma_alloc(). * It returns false when provided pages do not belong to contiguous area and * true otherwise. */ @@ -1895,7 +1895,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, } #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ -#if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +#if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 249671873aa9..cdbb7a84cb6e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -695,12 +695,15 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) if (mem_cgroup_disabled()) return; - __this_cpu_add(memcg->vmstats_local->stat[idx], val); - x = val + __this_cpu_read(memcg->vmstats_percpu->stat[idx]); if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { struct mem_cgroup *mi; + /* + * Batch local counters to keep them in sync with + * the hierarchical ones. + */ + __this_cpu_add(memcg->vmstats_local->stat[idx], x); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) atomic_long_add(x, &mi->vmstats[idx]); x = 0; @@ -749,13 +752,15 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, /* Update memcg */ __mod_memcg_state(memcg, idx, val); - /* Update lruvec */ - __this_cpu_add(pn->lruvec_stat_local->count[idx], val); - x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { struct mem_cgroup_per_node *pi; + /* + * Batch local counters to keep them in sync with + * the hierarchical ones. + */ + __this_cpu_add(pn->lruvec_stat_local->count[idx], x); for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) atomic_long_add(x, &pi->lruvec_stat[idx]); x = 0; @@ -777,12 +782,15 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx, if (mem_cgroup_disabled()) return; - __this_cpu_add(memcg->vmstats_local->events[idx], count); - x = count + __this_cpu_read(memcg->vmstats_percpu->events[idx]); if (unlikely(x > MEMCG_CHARGE_BATCH)) { struct mem_cgroup *mi; + /* + * Batch local counters to keep them in sync with + * the hierarchical ones. + */ + __this_cpu_add(memcg->vmstats_local->events[idx], x); for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) atomic_long_add(x, &mi->vmevents[idx]); x = 0; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6166ba5a15f3..4ebe696138e8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1734,9 +1734,10 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1; pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n", &beginpa, &endpa); - } - return ret; + return -EBUSY; + } + return 0; } static int check_cpu_on_node(pg_data_t *pgdat) @@ -1819,19 +1820,9 @@ static void __release_memory_resource(resource_size_t start, } } -/** - * remove_memory - * @nid: the node ID - * @start: physical address of the region to remove - * @size: size of the region to remove - * - * NOTE: The caller must call lock_device_hotplug() to serialize hotplug - * and online/offline operations before this call, as required by - * try_offline_node(). - */ -void __ref __remove_memory(int nid, u64 start, u64 size) +static int __ref try_remove_memory(int nid, u64 start, u64 size) { - int ret; + int rc = 0; BUG_ON(check_hotplug_memory_range(start, size)); @@ -1839,13 +1830,13 @@ void __ref __remove_memory(int nid, u64 start, u64 size) /* * All memory blocks must be offlined before removing memory. Check - * whether all memory blocks in question are offline and trigger a BUG() + * whether all memory blocks in question are offline and return error * if this is not the case. */ - ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - check_memblock_offlined_cb); - if (ret) - BUG(); + rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, + check_memblock_offlined_cb); + if (rc) + goto done; /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); @@ -1857,14 +1848,45 @@ void __ref __remove_memory(int nid, u64 start, u64 size) try_offline_node(nid); +done: mem_hotplug_done(); + return rc; } -void remove_memory(int nid, u64 start, u64 size) +/** + * remove_memory + * @nid: the node ID + * @start: physical address of the region to remove + * @size: size of the region to remove + * + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug + * and online/offline operations before this call, as required by + * try_offline_node(). + */ +void __remove_memory(int nid, u64 start, u64 size) +{ + + /* + * trigger BUG() is some memory is not offlined prior to calling this + * function + */ + if (try_remove_memory(nid, start, size)) + BUG(); +} + +/* + * Remove memory if every memory block is offline, otherwise return -EBUSY is + * some memory is not offline + */ +int remove_memory(int nid, u64 start, u64 size) { + int rc; + lock_device_hotplug(); - __remove_memory(nid, start, size); + rc = try_remove_memory(nid, start, size); unlock_device_hotplug(); + + return rc; } EXPORT_SYMBOL_GPL(remove_memory); #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/mm/nommu.c b/mm/nommu.c index eb3e2e558da1..fed1b6e9c89b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1261,7 +1261,9 @@ unsigned long do_mmap(struct file *file, add_nommu_region(region); /* clear anonymous mappings that don't ask for uninitialized data */ - if (!vma->vm_file && !(flags & MAP_UNINITIALIZED)) + if (!vma->vm_file && + (!IS_ENABLED(CONFIG_MMAP_ALLOW_UNINITIALIZED) || + !(flags & MAP_UNINITIALIZED))) memset((void *)region->vm_start, 0, region->vm_end - region->vm_start); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8fd7f45a04eb..e515bfcf7f28 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4102,7 +4102,6 @@ static int __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { - struct reclaim_state reclaim_state; int progress; unsigned int noreclaim_flag; unsigned long pflags; @@ -4114,13 +4113,10 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, psi_memstall_enter(&pflags); fs_reclaim_acquire(gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); - reclaim_state.reclaimed_slab = 0; - current->reclaim_state = &reclaim_state; progress = try_to_free_pages(ac->zonelist, order, gfp_mask, ac->nodemask); - current->reclaim_state = NULL; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(gfp_mask); psi_memstall_leave(&pflags); diff --git a/mm/shmem.c b/mm/shmem.c index f4dce9c8670d..99497cb32e71 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -400,7 +400,7 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly; -#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) +#if defined(CONFIG_SYSFS) static int shmem_parse_huge(const char *str) { if (!strcmp(str, "never")) @@ -417,7 +417,9 @@ static int shmem_parse_huge(const char *str) return SHMEM_HUGE_FORCE; return -EINVAL; } +#endif +#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) static const char *shmem_format_huge(int huge) { switch (huge) { diff --git a/mm/slab_common.c b/mm/slab_common.c index 6c49dbb3769e..807490fe217a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1028,7 +1028,8 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, } struct kmem_cache * -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init = +{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ }; EXPORT_SYMBOL(kmalloc_caches); /* diff --git a/mm/util.c b/mm/util.c index 68575a315dc5..e6351a80f248 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,6 +7,7 @@ #include <linux/err.h> #include <linux/sched.h> #include <linux/sched/mm.h> +#include <linux/sched/signal.h> #include <linux/sched/task_stack.h> #include <linux/security.h> #include <linux/swap.h> @@ -300,6 +301,80 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } #endif +/** + * __account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * @task: task used to check RLIMIT_MEMLOCK + * @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped + * + * Assumes @task and @mm are valid (i.e. at least one reference on each), and + * that mmap_sem is held as writer. + * + * Return: + * * 0 on success + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, + struct task_struct *task, bool bypass_rlim) +{ + unsigned long locked_vm, limit; + int ret = 0; + + lockdep_assert_held_write(&mm->mmap_sem); + + locked_vm = mm->locked_vm; + if (inc) { + if (!bypass_rlim) { + limit = task_rlimit(task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked_vm + pages > limit) + ret = -ENOMEM; + } + if (!ret) + mm->locked_vm = locked_vm + pages; + } else { + WARN_ON_ONCE(pages > locked_vm); + mm->locked_vm = locked_vm - pages; + } + + pr_debug("%s: [%d] caller %ps %c%lu %lu/%lu%s\n", __func__, task->pid, + (void *)_RET_IP_, (inc) ? '+' : '-', pages << PAGE_SHIFT, + locked_vm << PAGE_SHIFT, task_rlimit(task, RLIMIT_MEMLOCK), + ret ? " - exceeded" : ""); + + return ret; +} +EXPORT_SYMBOL_GPL(__account_locked_vm); + +/** + * account_locked_vm - account locked pages to an mm's locked_vm + * @mm: mm to account against, may be NULL + * @pages: number of pages to account + * @inc: %true if @pages should be considered positive, %false if not + * + * Assumes a non-NULL @mm is valid (i.e. at least one reference on it). + * + * Return: + * * 0 on success, or if mm is NULL + * * -ENOMEM if RLIMIT_MEMLOCK would be exceeded. + */ +int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc) +{ + int ret; + + if (pages == 0 || !mm) + return 0; + + down_write(&mm->mmap_sem); + ret = __account_locked_vm(mm, pages, inc, current, + capable(CAP_IPC_LOCK)); + up_write(&mm->mmap_sem); + + return ret; +} +EXPORT_SYMBOL_GPL(account_locked_vm); + unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long pgoff) diff --git a/mm/vmscan.c b/mm/vmscan.c index f8e3dcd527b8..44df66a98f2a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -131,6 +131,9 @@ struct scan_control { unsigned int file_taken; unsigned int taken; } nr; + + /* for recording the reclaimed slab by now */ + struct reclaim_state reclaim_state; }; #ifdef ARCH_HAS_PREFETCH @@ -238,6 +241,18 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker) } #endif /* CONFIG_MEMCG_KMEM */ +static void set_task_reclaim_state(struct task_struct *task, + struct reclaim_state *rs) +{ + /* Check for an overwrite */ + WARN_ON_ONCE(rs && task->reclaim_state); + + /* Check for the nulling of an already-nulled member */ + WARN_ON_ONCE(!rs && !task->reclaim_state); + + task->reclaim_state = rs; +} + #ifdef CONFIG_MEMCG static bool global_reclaim(struct scan_control *sc) { @@ -3191,11 +3206,13 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, if (throttle_direct_reclaim(sc.gfp_mask, zonelist, nodemask)) return 1; + set_task_reclaim_state(current, &sc.reclaim_state); trace_mm_vmscan_direct_reclaim_begin(order, sc.gfp_mask); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); + set_task_reclaim_state(current, NULL); return nr_reclaimed; } @@ -3218,6 +3235,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, }; unsigned long lru_pages; + set_task_reclaim_state(current, &sc.reclaim_state); sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); @@ -3235,7 +3253,9 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); + set_task_reclaim_state(current, NULL); *nr_scanned = sc.nr_scanned; + return sc.nr_reclaimed; } @@ -3262,6 +3282,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_shrinkslab = 1, }; + set_task_reclaim_state(current, &sc.reclaim_state); /* * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't * take care of from where we get pages. So the node where we start the @@ -3282,6 +3303,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, psi_memstall_leave(&pflags); trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); + set_task_reclaim_state(current, NULL); return nr_reclaimed; } @@ -3483,6 +3505,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) .may_unmap = 1, }; + set_task_reclaim_state(current, &sc.reclaim_state); psi_memstall_enter(&pflags); __fs_reclaim_acquire(); @@ -3664,6 +3687,8 @@ out: snapshot_refaults(NULL, pgdat); __fs_reclaim_release(); psi_memstall_leave(&pflags); + set_task_reclaim_state(current, NULL); + /* * Return the order kswapd stopped reclaiming at as * prepare_kswapd_sleep() takes it into account. If another caller @@ -3787,15 +3812,10 @@ static int kswapd(void *p) unsigned int classzone_idx = MAX_NR_ZONES - 1; pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; - - struct reclaim_state reclaim_state = { - .reclaimed_slab = 0, - }; const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); if (!cpumask_empty(cpumask)) set_cpus_allowed_ptr(tsk, cpumask); - current->reclaim_state = &reclaim_state; /* * Tell the memory management that we're a "memory allocator", @@ -3857,7 +3877,6 @@ kswapd_try_sleep: } tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); - current->reclaim_state = NULL; return 0; } @@ -3922,7 +3941,6 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, */ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) { - struct reclaim_state reclaim_state; struct scan_control sc = { .nr_to_reclaim = nr_to_reclaim, .gfp_mask = GFP_HIGHUSER_MOVABLE, @@ -3934,18 +3952,16 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) .hibernation_mode = 1, }; struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask); - struct task_struct *p = current; unsigned long nr_reclaimed; unsigned int noreclaim_flag; fs_reclaim_acquire(sc.gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); - reclaim_state.reclaimed_slab = 0; - p->reclaim_state = &reclaim_state; + set_task_reclaim_state(current, &sc.reclaim_state); nr_reclaimed = do_try_to_free_pages(zonelist, &sc); - p->reclaim_state = NULL; + set_task_reclaim_state(current, NULL); memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); @@ -4110,7 +4126,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in /* Minimum pages needed in order to stay on node */ const unsigned long nr_pages = 1 << order; struct task_struct *p = current; - struct reclaim_state reclaim_state; unsigned int noreclaim_flag; struct scan_control sc = { .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX), @@ -4135,8 +4150,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in */ noreclaim_flag = memalloc_noreclaim_save(); p->flags |= PF_SWAPWRITE; - reclaim_state.reclaimed_slab = 0; - p->reclaim_state = &reclaim_state; + set_task_reclaim_state(p, &sc.reclaim_state); if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { /* @@ -4148,7 +4162,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); } - p->reclaim_state = NULL; + set_task_reclaim_state(p, NULL); current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); diff --git a/mm/z3fold.c b/mm/z3fold.c index dfcd69d08c1e..6c72b18d8b9c 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -101,6 +101,7 @@ struct z3fold_buddy_slots { * @refcount: reference count for the z3fold page * @work: work_struct for page layout optimization * @slots: pointer to the structure holding buddy slots + * @pool: pointer to the containing pool * @cpu: CPU which this page "belongs" to * @first_chunks: the size of the first buddy in chunks, 0 if free * @middle_chunks: the size of the middle buddy in chunks, 0 if free @@ -114,6 +115,7 @@ struct z3fold_header { struct kref refcount; struct work_struct work; struct z3fold_buddy_slots *slots; + struct z3fold_pool *pool; short cpu; unsigned short first_chunks; unsigned short middle_chunks; @@ -193,8 +195,10 @@ static void compact_page_work(struct work_struct *w); static inline struct z3fold_buddy_slots *alloc_slots(struct z3fold_pool *pool, gfp_t gfp) { - struct z3fold_buddy_slots *slots = kmem_cache_alloc(pool->c_handle, - gfp); + struct z3fold_buddy_slots *slots; + + slots = kmem_cache_alloc(pool->c_handle, + (gfp & ~(__GFP_HIGHMEM | __GFP_MOVABLE))); if (slots) { memset(slots->slot, 0, sizeof(slots->slot)); @@ -320,6 +324,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, zhdr->start_middle = 0; zhdr->cpu = -1; zhdr->slots = slots; + zhdr->pool = pool; INIT_LIST_HEAD(&zhdr->buddy); INIT_WORK(&zhdr->work, compact_page_work); return zhdr; @@ -426,7 +431,7 @@ static enum buddy handle_to_buddy(unsigned long handle) static inline struct z3fold_pool *zhdr_to_pool(struct z3fold_header *zhdr) { - return slots_to_pool(zhdr->slots); + return zhdr->pool; } static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked) @@ -850,7 +855,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, enum buddy bud; bool can_sleep = gfpflags_allow_blocking(gfp); - if (!size || (gfp & __GFP_HIGHMEM)) + if (!size) return -EINVAL; if (size > PAGE_SIZE) @@ -1345,24 +1350,29 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa zhdr = page_address(page); pool = zhdr_to_pool(zhdr); - if (!trylock_page(page)) - return -EAGAIN; - if (!z3fold_page_trylock(zhdr)) { - unlock_page(page); return -EAGAIN; } if (zhdr->mapped_count != 0) { z3fold_page_unlock(zhdr); - unlock_page(page); return -EBUSY; } + if (work_pending(&zhdr->work)) { + z3fold_page_unlock(zhdr); + return -EAGAIN; + } new_zhdr = page_address(newpage); memcpy(new_zhdr, zhdr, PAGE_SIZE); newpage->private = page->private; page->private = 0; z3fold_page_unlock(zhdr); spin_lock_init(&new_zhdr->page_lock); + INIT_WORK(&new_zhdr->work, compact_page_work); + /* + * z3fold_page_isolate() ensures that new_zhdr->buddy is empty, + * so we only have to reinitialize it. + */ + INIT_LIST_HEAD(&new_zhdr->buddy); new_mapping = page_mapping(page); __ClearPageMovable(page); ClearPagePrivate(page); @@ -1386,7 +1396,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); page_mapcount_reset(page); - unlock_page(page); put_page(page); return 0; } |