summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-04-02 13:55:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-04-02 13:55:34 -0700
commit6cad420cc695867b4ca710bac21fde21a4102e4b (patch)
tree890d42abc1e82c2cf5cef583584f88ca70116ce9 /include
parent7be97138e7276c71cc9ad1752dcb502d28f4400d (diff)
parent77d6b9094819ba55353de0ef92957f3f54f2c36c (diff)
downloadlwn-6cad420cc695867b4ca710bac21fde21a4102e4b.tar.gz
lwn-6cad420cc695867b4ca710bac21fde21a4102e4b.zip
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: "A large amount of MM, plenty more to come. Subsystems affected by this patch series: - tools - kthread - kbuild - scripts - ocfs2 - vfs - mm: slub, kmemleak, pagecache, gup, swap, memcg, pagemap, mremap, sparsemem, kasan, pagealloc, vmscan, compaction, mempolicy, hugetlbfs, hugetlb" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits) include/linux/huge_mm.h: check PageTail in hpage_nr_pages even when !THP mm/hugetlb: fix build failure with HUGETLB_PAGE but not HUGEBTLBFS selftests/vm: fix map_hugetlb length used for testing read and write mm/hugetlb: remove unnecessary memory fetch in PageHeadHuge() mm/hugetlb.c: clean code by removing unnecessary initialization hugetlb_cgroup: add hugetlb_cgroup reservation docs hugetlb_cgroup: add hugetlb_cgroup reservation tests hugetlb: support file_region coalescing again hugetlb_cgroup: support noreserve mappings hugetlb_cgroup: add accounting for shared mappings hugetlb: disable region_add file_region coalescing hugetlb_cgroup: add reservation accounting for private mappings mm/hugetlb_cgroup: fix hugetlb_cgroup migration hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations hugetlb_cgroup: add hugetlb_cgroup reservation counter hugetlbfs: Use i_mmap_rwsem to address page fault/truncate race hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization mm/memblock.c: remove redundant assignment to variable max_addr mm: mempolicy: require at least one nodeid for MPOL_PREFERRED mm: mempolicy: use VM_BUG_ON_VMA in queue_pages_test_walk() ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/Kbuild52
-rw-r--r--include/linux/cgroup-defs.h5
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/gfp.h6
-rw-r--r--include/linux/huge_mm.h10
-rw-r--r--include/linux/hugetlb.h76
-rw-r--r--include/linux/hugetlb_cgroup.h169
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/kthread.h3
-rw-r--r--include/linux/memcontrol.h42
-rw-r--r--include/linux/mempolicy.h29
-rw-r--r--include/linux/mm.h239
-rw-r--r--include/linux/mm_types.h7
-rw-r--r--include/linux/mmzone.h6
-rw-r--r--include/linux/page_ref.h9
-rw-r--r--include/linux/pagemap.h29
-rw-r--r--include/linux/sched/signal.h16
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/topology.h17
-rw-r--r--include/trace/events/mmap.h48
-rw-r--r--include/uapi/linux/mman.h5
21 files changed, 609 insertions, 167 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index cd17d50697cc..36341dfded70 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -4,6 +4,58 @@
# (This file is not included when SRCARCH=um since UML borrows several
# asm headers from the host architecutre.)
+mandatory-y += atomic.h
+mandatory-y += barrier.h
+mandatory-y += bitops.h
+mandatory-y += bug.h
+mandatory-y += bugs.h
+mandatory-y += cacheflush.h
+mandatory-y += checksum.h
+mandatory-y += compat.h
+mandatory-y += current.h
+mandatory-y += delay.h
+mandatory-y += device.h
+mandatory-y += div64.h
mandatory-y += dma-contiguous.h
+mandatory-y += dma-mapping.h
+mandatory-y += dma.h
+mandatory-y += emergency-restart.h
+mandatory-y += exec.h
+mandatory-y += fb.h
+mandatory-y += ftrace.h
+mandatory-y += futex.h
+mandatory-y += hardirq.h
+mandatory-y += hw_irq.h
+mandatory-y += io.h
+mandatory-y += irq.h
+mandatory-y += irq_regs.h
+mandatory-y += irq_work.h
+mandatory-y += kdebug.h
+mandatory-y += kmap_types.h
+mandatory-y += kprobes.h
+mandatory-y += linkage.h
+mandatory-y += local.h
+mandatory-y += mm-arch-hooks.h
+mandatory-y += mmiowb.h
+mandatory-y += mmu.h
+mandatory-y += mmu_context.h
+mandatory-y += module.h
mandatory-y += msi.h
+mandatory-y += pci.h
+mandatory-y += percpu.h
+mandatory-y += pgalloc.h
+mandatory-y += preempt.h
+mandatory-y += sections.h
+mandatory-y += serial.h
+mandatory-y += shmparam.h
mandatory-y += simd.h
+mandatory-y += switch_to.h
+mandatory-y += timex.h
+mandatory-y += tlbflush.h
+mandatory-y += topology.h
+mandatory-y += trace_clock.h
+mandatory-y += uaccess.h
+mandatory-y += unaligned.h
+mandatory-y += vga.h
+mandatory-y += word-at-a-time.h
+mandatory-y += xor.h
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 63097cb243cb..e1fafed22db1 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -94,6 +94,11 @@ enum {
* Enable legacy local memory.events.
*/
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
+
+ /*
+ * Enable recursive subtree protection
+ */
+ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
};
/* cftype->flags */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3d69de600494..f81c822f4d89 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -526,6 +526,11 @@ static inline void i_mmap_lock_write(struct address_space *mapping)
down_write(&mapping->i_mmap_rwsem);
}
+static inline int i_mmap_trylock_write(struct address_space *mapping)
+{
+ return down_write_trylock(&mapping->i_mmap_rwsem);
+}
+
static inline void i_mmap_unlock_write(struct address_space *mapping)
{
up_write(&mapping->i_mmap_rwsem);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e5b817cb86e7..be2754841369 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { }
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+ return 0;
+}
+#endif
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5aca3d1bdb32..680a0d9a9721 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -87,8 +87,6 @@ extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
-extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
-
extern unsigned long transparent_hugepage_flags;
/*
@@ -100,7 +98,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
if (vma->vm_flags & VM_NOHUGEPAGE)
return false;
- if (is_vma_temporary_stack(vma))
+ if (vma_is_temporary_stack(vma))
return false;
if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
@@ -289,7 +287,11 @@ static inline struct list_head *page_deferred_list(struct page *page)
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
-#define hpage_nr_pages(x) 1
+static inline int hpage_nr_pages(struct page *page)
+{
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ return 1;
+}
static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1e897e4168ac..5ea05879a0a9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -46,7 +46,52 @@ struct resv_map {
long adds_in_progress;
struct list_head region_cache;
long region_cache_count;
+#ifdef CONFIG_CGROUP_HUGETLB
+ /*
+ * On private mappings, the counter to uncharge reservations is stored
+ * here. If these fields are 0, then either the mapping is shared, or
+ * cgroup accounting is disabled for this resv_map.
+ */
+ struct page_counter *reservation_counter;
+ unsigned long pages_per_hpage;
+ struct cgroup_subsys_state *css;
+#endif
+};
+
+/*
+ * Region tracking -- allows tracking of reservations and instantiated pages
+ * across the pages in a mapping.
+ *
+ * The region data structures are embedded into a resv_map and protected
+ * by a resv_map's lock. The set of regions within the resv_map represent
+ * reservations for huge pages, or huge pages that have already been
+ * instantiated within the map. The from and to elements are huge page
+ * indicies into the associated mapping. from indicates the starting index
+ * of the region. to represents the first index past the end of the region.
+ *
+ * For example, a file region structure with from == 0 and to == 4 represents
+ * four huge pages in a mapping. It is important to note that the to element
+ * represents the first element past the end of the region. This is used in
+ * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
+ *
+ * Interval notation of the form [from, to) will be used to indicate that
+ * the endpoint from is inclusive and to is exclusive.
+ */
+struct file_region {
+ struct list_head link;
+ long from;
+ long to;
+#ifdef CONFIG_CGROUP_HUGETLB
+ /*
+ * On shared mappings, each reserved region appears as a struct
+ * file_region in resv_map. These fields hold the info needed to
+ * uncharge each reservation.
+ */
+ struct page_counter *reservation_counter;
+ struct cgroup_subsys_state *css;
+#endif
};
+
extern struct resv_map *resv_map_alloc(void);
void resv_map_release(struct kref *ref);
@@ -109,6 +154,8 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
+struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
+
extern int sysctl_hugetlb_shm_group;
extern struct list_head huge_boot_pages;
@@ -151,6 +198,12 @@ static inline unsigned long hugetlb_total_pages(void)
return 0;
}
+static inline struct address_space *hugetlb_page_mapping_lock_write(
+ struct page *hpage)
+{
+ return NULL;
+}
+
static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
pte_t *ptep)
{
@@ -390,7 +443,10 @@ static inline bool is_file_hugepages(struct file *file)
return is_file_shm_hugepages(file);
}
-
+static inline struct hstate *hstate_inode(struct inode *i)
+{
+ return HUGETLBFS_SB(i->i_sb)->hstate;
+}
#else /* !CONFIG_HUGETLBFS */
#define is_file_hugepages(file) false
@@ -402,6 +458,10 @@ hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
return ERR_PTR(-ENOSYS);
}
+static inline struct hstate *hstate_inode(struct inode *i)
+{
+ return NULL;
+}
#endif /* !CONFIG_HUGETLBFS */
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
@@ -432,8 +492,8 @@ struct hstate {
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
#ifdef CONFIG_CGROUP_HUGETLB
/* cgroup control files */
- struct cftype cgroup_files_dfl[5];
- struct cftype cgroup_files_legacy[5];
+ struct cftype cgroup_files_dfl[7];
+ struct cftype cgroup_files_legacy[9];
#endif
char name[HSTATE_NAME_LEN];
};
@@ -472,11 +532,6 @@ extern unsigned int default_hstate_idx;
#define default_hstate (hstates[default_hstate_idx])
-static inline struct hstate *hstate_inode(struct inode *i)
-{
- return HUGETLBFS_SB(i->i_sb)->hstate;
-}
-
static inline struct hstate *hstate_file(struct file *f)
{
return hstate_inode(file_inode(f));
@@ -729,11 +784,6 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
return NULL;
}
-static inline struct hstate *hstate_inode(struct inode *i)
-{
- return NULL;
-}
-
static inline struct hstate *page_hstate(struct page *page)
{
return NULL;
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 063962f6dfc6..2ad6e92f124a 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -18,34 +18,96 @@
#include <linux/mmdebug.h>
struct hugetlb_cgroup;
+struct resv_map;
+struct file_region;
+
/*
* Minimum page order trackable by hugetlb cgroup.
- * At least 3 pages are necessary for all the tracking information.
+ * At least 4 pages are necessary for all the tracking information.
+ * The second tail page (hpage[2]) is the fault usage cgroup.
+ * The third tail page (hpage[3]) is the reservation usage cgroup.
*/
#define HUGETLB_CGROUP_MIN_ORDER 2
#ifdef CONFIG_CGROUP_HUGETLB
+enum hugetlb_memory_event {
+ HUGETLB_MAX,
+ HUGETLB_NR_MEMORY_EVENTS,
+};
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+struct hugetlb_cgroup {
+ struct cgroup_subsys_state css;
+
+ /*
+ * the counter to account for hugepages from hugetlb.
+ */
+ struct page_counter hugepage[HUGE_MAX_HSTATE];
+
+ /*
+ * the counter to account for hugepage reservations from hugetlb.
+ */
+ struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
+
+ atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
+ atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
+
+ /* Handle for "hugetlb.events" */
+ struct cgroup_file events_file[HUGE_MAX_HSTATE];
+
+ /* Handle for "hugetlb.events.local" */
+ struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
+};
+
+static inline struct hugetlb_cgroup *
+__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return NULL;
- return (struct hugetlb_cgroup *)page[2].private;
+ if (rsvd)
+ return (struct hugetlb_cgroup *)page[3].private;
+ else
+ return (struct hugetlb_cgroup *)page[2].private;
+}
+
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+{
+ return __hugetlb_cgroup_from_page(page, false);
+}
+
+static inline struct hugetlb_cgroup *
+hugetlb_cgroup_from_page_rsvd(struct page *page)
+{
+ return __hugetlb_cgroup_from_page(page, true);
}
-static inline
-int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+static inline int __set_hugetlb_cgroup(struct page *page,
+ struct hugetlb_cgroup *h_cg, bool rsvd)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
return -1;
- page[2].private = (unsigned long)h_cg;
+ if (rsvd)
+ page[3].private = (unsigned long)h_cg;
+ else
+ page[2].private = (unsigned long)h_cg;
return 0;
}
+static inline int set_hugetlb_cgroup(struct page *page,
+ struct hugetlb_cgroup *h_cg)
+{
+ return __set_hugetlb_cgroup(page, h_cg, false);
+}
+
+static inline int set_hugetlb_cgroup_rsvd(struct page *page,
+ struct hugetlb_cgroup *h_cg)
+{
+ return __set_hugetlb_cgroup(page, h_cg, true);
+}
+
static inline bool hugetlb_cgroup_disabled(void)
{
return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
@@ -53,25 +115,67 @@ static inline bool hugetlb_cgroup_disabled(void)
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
+extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr);
extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
struct page *page);
+extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg,
+ struct page *page);
extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page);
+extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
+ struct page *page);
+
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg);
+extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg);
+extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
+ unsigned long start,
+ unsigned long end);
+
+extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
+ struct file_region *rg,
+ unsigned long nr_pages);
+
extern void hugetlb_cgroup_file_init(void) __init;
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
struct page *newhpage);
#else
+static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
+ struct file_region *rg,
+ unsigned long nr_pages)
+{
+}
+
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
{
return NULL;
}
-static inline
-int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
+static inline struct hugetlb_cgroup *
+hugetlb_cgroup_from_page_resv(struct page *page)
+{
+ return NULL;
+}
+
+static inline struct hugetlb_cgroup *
+hugetlb_cgroup_from_page_rsvd(struct page *page)
+{
+ return NULL;
+}
+
+static inline int set_hugetlb_cgroup(struct page *page,
+ struct hugetlb_cgroup *h_cg)
+{
+ return 0;
+}
+
+static inline int set_hugetlb_cgroup_rsvd(struct page *page,
+ struct hugetlb_cgroup *h_cg)
{
return 0;
}
@@ -81,28 +185,57 @@ static inline bool hugetlb_cgroup_disabled(void)
return true;
}
-static inline int
-hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
- struct hugetlb_cgroup **ptr)
+static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr)
{
return 0;
}
-static inline void
-hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
- struct hugetlb_cgroup *h_cg,
- struct page *page)
+static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx,
+ unsigned long nr_pages,
+ struct hugetlb_cgroup **ptr)
+{
+ return 0;
+}
+
+static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg,
+ struct page *page)
{
}
static inline void
-hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
+hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg,
+ struct page *page)
+{
+}
+
+static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
+ struct page *page)
+{
+}
+
+static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
+ unsigned long nr_pages,
+ struct page *page)
+{
+}
+static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
+ unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg)
{
}
static inline void
-hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
- struct hugetlb_cgroup *h_cg)
+hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
+ struct hugetlb_cgroup *h_cg)
+{
+}
+
+static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
+ unsigned long start,
+ unsigned long end)
{
}
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 5cde9e7c2664..31314ca7c635 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -190,7 +190,7 @@ void kasan_init_tags(void);
void *kasan_reset_tag(const void *addr);
-void kasan_report(unsigned long addr, size_t size,
+bool kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);
#else /* CONFIG_KASAN_SW_TAGS */
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 0f9da966934e..8bbcaad7ef0f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -165,7 +165,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
do { \
kthread_init_work(&(dwork)->work, (fn)); \
timer_setup(&(dwork)->timer, \
- kthread_delayed_work_timer_fn, 0); \
+ kthread_delayed_work_timer_fn, \
+ TIMER_IRQSAFE); \
} while (0)
int kthread_worker_fn(void *worker_ptr);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e9ba01336d4e..1b4150ff64be 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1367,12 +1367,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
void memcg_kmem_put_cache(struct kmem_cache *cachep);
#ifdef CONFIG_MEMCG_KMEM
-int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
-void __memcg_kmem_uncharge(struct page *page, int order);
-int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
- struct mem_cgroup *memcg);
-void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg,
- unsigned int nr_pages);
+int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+ unsigned int nr_pages);
+void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
+int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
+void __memcg_kmem_uncharge_page(struct page *page, int order);
extern struct static_key_false memcg_kmem_enabled_key;
extern struct workqueue_struct *memcg_kmem_cache_wq;
@@ -1394,32 +1393,33 @@ static inline bool memcg_kmem_enabled(void)
return static_branch_unlikely(&memcg_kmem_enabled_key);
}
-static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
+ int order)
{
if (memcg_kmem_enabled())
- return __memcg_kmem_charge(page, gfp, order);
+ return __memcg_kmem_charge_page(page, gfp, order);
return 0;
}
-static inline void memcg_kmem_uncharge(struct page *page, int order)
+static inline void memcg_kmem_uncharge_page(struct page *page, int order)
{
if (memcg_kmem_enabled())
- __memcg_kmem_uncharge(page, order);
+ __memcg_kmem_uncharge_page(page, order);
}
-static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp,
- int order, struct mem_cgroup *memcg)
+static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+ unsigned int nr_pages)
{
if (memcg_kmem_enabled())
- return __memcg_kmem_charge_memcg(page, gfp, order, memcg);
+ return __memcg_kmem_charge(memcg, gfp, nr_pages);
return 0;
}
-static inline void memcg_kmem_uncharge_memcg(struct page *page, int order,
- struct mem_cgroup *memcg)
+static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
+ unsigned int nr_pages)
{
if (memcg_kmem_enabled())
- __memcg_kmem_uncharge_memcg(memcg, 1 << order);
+ __memcg_kmem_uncharge(memcg, nr_pages);
}
/*
@@ -1436,21 +1436,23 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p);
#else
-static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
+ int order)
{
return 0;
}
-static inline void memcg_kmem_uncharge(struct page *page, int order)
+static inline void memcg_kmem_uncharge_page(struct page *page, int order)
{
}
-static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp,
+ int order)
{
return 0;
}
-static inline void __memcg_kmem_uncharge(struct page *page, int order)
+static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af416..8165278c348a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -173,34 +173,7 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol);
extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
/* Check if a vma is migratable */
-static inline bool vma_migratable(struct vm_area_struct *vma)
-{
- if (vma->vm_flags & (VM_IO | VM_PFNMAP))
- return false;
-
- /*
- * DAX device mappings require predictable access latency, so avoid
- * incurring periodic faults.
- */
- if (vma_is_dax(vma))
- return false;
-
-#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
- if (vma->vm_flags & VM_HUGETLB)
- return false;
-#endif
-
- /*
- * Migration allocates pages in the highest zone. If we cannot
- * do so then migration (at least from node to node) is not
- * possible.
- */
- if (vma->vm_file &&
- gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
- < policy_zone)
- return false;
- return true;
-}
+extern bool vma_migratable(struct vm_area_struct *vma);
extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
extern void mpol_put_task_policy(struct task_struct *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c54fb96cb1e6..937bf719c329 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -27,6 +27,7 @@
#include <linux/memremap.h>
#include <linux/overflow.h>
#include <linux/sizes.h>
+#include <linux/sched.h>
struct mempolicy;
struct anon_vma;
@@ -356,10 +357,12 @@ extern unsigned int kobjsize(const void *objp);
/*
* Special vmas that are non-mergable, non-mlock()able.
- * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
*/
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
+/* This mask prevents VMA from being scanned with khugepaged */
+#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
+
/* This mask defines which mm->def_flags a process can inherit its parent */
#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
@@ -378,15 +381,75 @@ extern unsigned int kobjsize(const void *objp);
*/
extern pgprot_t protection_map[16];
-#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
-#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
-#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
-#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
-#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
-#define FAULT_FLAG_TRIED 0x20 /* Second try */
-#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
-#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
-#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
+/**
+ * Fault flag definitions.
+ *
+ * @FAULT_FLAG_WRITE: Fault was a write fault.
+ * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
+ * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
+ * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying.
+ * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
+ * @FAULT_FLAG_TRIED: The fault has been tried once.
+ * @FAULT_FLAG_USER: The fault originated in userspace.
+ * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
+ * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
+ * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
+ *
+ * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
+ * whether we would allow page faults to retry by specifying these two
+ * fault flags correctly. Currently there can be three legal combinations:
+ *
+ * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
+ * this is the first try
+ *
+ * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
+ * we've already tried at least once
+ *
+ * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
+ *
+ * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
+ * be used. Note that page faults can be allowed to retry for multiple times,
+ * in which case we'll have an initial fault with flags (a) then later on
+ * continuous faults with flags (b). We should always try to detect pending
+ * signals before a retry to make sure the continuous page faults can still be
+ * interrupted if necessary.
+ */
+#define FAULT_FLAG_WRITE 0x01
+#define FAULT_FLAG_MKWRITE 0x02
+#define FAULT_FLAG_ALLOW_RETRY 0x04
+#define FAULT_FLAG_RETRY_NOWAIT 0x08
+#define FAULT_FLAG_KILLABLE 0x10
+#define FAULT_FLAG_TRIED 0x20
+#define FAULT_FLAG_USER 0x40
+#define FAULT_FLAG_REMOTE 0x80
+#define FAULT_FLAG_INSTRUCTION 0x100
+#define FAULT_FLAG_INTERRUPTIBLE 0x200
+
+/*
+ * The default fault flags that should be used by most of the
+ * arch-specific page fault handlers.
+ */
+#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \
+ FAULT_FLAG_KILLABLE | \
+ FAULT_FLAG_INTERRUPTIBLE)
+
+/**
+ * fault_flag_allow_retry_first - check ALLOW_RETRY the first time
+ *
+ * This is mostly used for places where we want to try to avoid taking
+ * the mmap_sem for too long a time when waiting for another condition
+ * to change, in which case we can try to be polite to release the
+ * mmap_sem in the first round to avoid potential starvation of other
+ * processes that would also want the mmap_sem.
+ *
+ * Return: true if the page fault allows retry and this is the first
+ * attempt of the fault handling; false otherwise.
+ */
+static inline bool fault_flag_allow_retry_first(unsigned int flags)
+{
+ return (flags & FAULT_FLAG_ALLOW_RETRY) &&
+ (!(flags & FAULT_FLAG_TRIED));
+}
#define FAULT_FLAG_TRACE \
{ FAULT_FLAG_WRITE, "WRITE" }, \
@@ -397,7 +460,8 @@ extern pgprot_t protection_map[16];
{ FAULT_FLAG_TRIED, "TRIED" }, \
{ FAULT_FLAG_USER, "USER" }, \
{ FAULT_FLAG_REMOTE, "REMOTE" }, \
- { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }
+ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
+ { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -541,6 +605,30 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
return !vma->vm_ops;
}
+static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
+{
+ int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
+
+ if (!maybe_stack)
+ return false;
+
+ if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
+ VM_STACK_INCOMPLETE_SETUP)
+ return true;
+
+ return false;
+}
+
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+ if (!current->mm)
+ return true;
+
+ if (current->mm != vma->vm_mm)
+ return true;
+
+ return false;
+}
#ifdef CONFIG_SHMEM
/*
* The vma_is_shmem is not inline because it is used only by slow
@@ -770,6 +858,24 @@ static inline unsigned int compound_order(struct page *page)
return page[1].compound_order;
}
+static inline bool hpage_pincount_available(struct page *page)
+{
+ /*
+ * Can the page->hpage_pinned_refcount field be used? That field is in
+ * the 3rd page of the compound page, so the smallest (2-page) compound
+ * pages cannot support it.
+ */
+ page = compound_head(page);
+ return PageCompound(page) && compound_order(page) > 1;
+}
+
+static inline int compound_pincount(struct page *page)
+{
+ VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
+ page = compound_head(page);
+ return atomic_read(compound_pincount_ptr(page));
+}
+
static inline void set_compound_order(struct page *page, unsigned int order)
{
page[1].compound_order = order;
@@ -1001,6 +1107,8 @@ static inline void get_page(struct page *page)
page_ref_inc(page);
}
+bool __must_check try_grab_page(struct page *page, unsigned int flags);
+
static inline __must_check bool try_get_page(struct page *page)
{
page = compound_head(page);
@@ -1029,29 +1137,87 @@ static inline void put_page(struct page *page)
__put_page(page);
}
-/**
- * unpin_user_page() - release a gup-pinned page
- * @page: pointer to page to be released
+/*
+ * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload
+ * the page's refcount so that two separate items are tracked: the original page
+ * reference count, and also a new count of how many pin_user_pages() calls were
+ * made against the page. ("gup-pinned" is another term for the latter).
*
- * Pages that were pinned via pin_user_pages*() must be released via either
- * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
- * that eventually such pages can be separately tracked and uniquely handled. In
- * particular, interactions with RDMA and filesystems need special handling.
+ * With this scheme, pin_user_pages() becomes special: such pages are marked as
+ * distinct from normal pages. As such, the unpin_user_page() call (and its
+ * variants) must be used in order to release gup-pinned pages.
*
- * unpin_user_page() and put_page() are not interchangeable, despite this early
- * implementation that makes them look the same. unpin_user_page() calls must
- * be perfectly matched up with pin*() calls.
+ * Choice of value:
+ *
+ * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference
+ * counts with respect to pin_user_pages() and unpin_user_page() becomes
+ * simpler, due to the fact that adding an even power of two to the page
+ * refcount has the effect of using only the upper N bits, for the code that
+ * counts up using the bias value. This means that the lower bits are left for
+ * the exclusive use of the original code that increments and decrements by one
+ * (or at least, by much smaller values than the bias value).
+ *
+ * Of course, once the lower bits overflow into the upper bits (and this is
+ * OK, because subtraction recovers the original values), then visual inspection
+ * no longer suffices to directly view the separate counts. However, for normal
+ * applications that don't have huge page reference counts, this won't be an
+ * issue.
+ *
+ * Locking: the lockless algorithm described in page_cache_get_speculative()
+ * and page_cache_gup_pin_speculative() provides safe operation for
+ * get_user_pages and page_mkclean and other calls that race to set up page
+ * table entries.
*/
-static inline void unpin_user_page(struct page *page)
-{
- put_page(page);
-}
+#define GUP_PIN_COUNTING_BIAS (1U << 10)
+void unpin_user_page(struct page *page);
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty);
-
void unpin_user_pages(struct page **pages, unsigned long npages);
+/**
+ * page_maybe_dma_pinned() - report if a page is pinned for DMA.
+ *
+ * This function checks if a page has been pinned via a call to
+ * pin_user_pages*().
+ *
+ * For non-huge pages, the return value is partially fuzzy: false is not fuzzy,
+ * because it means "definitely not pinned for DMA", but true means "probably
+ * pinned for DMA, but possibly a false positive due to having at least
+ * GUP_PIN_COUNTING_BIAS worth of normal page references".
+ *
+ * False positives are OK, because: a) it's unlikely for a page to get that many
+ * refcounts, and b) all the callers of this routine are expected to be able to
+ * deal gracefully with a false positive.
+ *
+ * For huge pages, the result will be exactly correct. That's because we have
+ * more tracking data available: the 3rd struct page in the compound page is
+ * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
+ * scheme).
+ *
+ * For more information, please see Documentation/vm/pin_user_pages.rst.
+ *
+ * @page: pointer to page to be queried.
+ * @Return: True, if it is likely that the page has been "dma-pinned".
+ * False, if the page is definitely not dma-pinned.
+ */
+static inline bool page_maybe_dma_pinned(struct page *page)
+{
+ if (hpage_pincount_available(page))
+ return compound_pincount(page) > 0;
+
+ /*
+ * page_ref_count() is signed. If that refcount overflows, then
+ * page_ref_count() returns a negative value, and callers will avoid
+ * further incrementing the refcount.
+ *
+ * Here, for that overflow case, use the signed bit to count a little
+ * bit higher via unsigned math, and thus still get an accurate result.
+ */
+ return ((unsigned int)page_ref_count(compound_head(page))) >=
+ GUP_PIN_COUNTING_BIAS;
+}
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -2364,26 +2530,7 @@ struct vm_unmapped_area_info {
unsigned long align_offset;
};
-extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
-extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
-
-/*
- * Search for an unmapped address range.
- *
- * We are looking for a range that:
- * - does not intersect with any VMA;
- * - is contained within the [low_limit, high_limit) interval;
- * - is at least the desired size.
- * - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
- */
-static inline unsigned long
-vm_unmapped_area(struct vm_unmapped_area_info *info)
-{
- if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
- return unmapped_area_topdown(info);
- else
- return unmapped_area(info);
-}
+extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
/* truncate.c */
extern void truncate_inode_pages(struct address_space *, loff_t);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c28911c3afa8..dd555e6d23f3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -137,7 +137,7 @@ struct page {
};
struct { /* Second tail page of compound page */
unsigned long _compound_pad_1; /* compound_head */
- unsigned long _compound_pad_2;
+ atomic_t hpage_pinned_refcount;
/* For both global and memcg */
struct list_head deferred_list;
};
@@ -226,6 +226,11 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page)
return &page[1].compound_mapcount;
}
+static inline atomic_t *compound_pincount_ptr(struct page *page)
+{
+ return &page[2].hpage_pinned_refcount;
+}
+
/*
* Used for sizing the vmemmap region on some architectures
*/
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..e84d448988b6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -243,6 +243,8 @@ enum node_stat_item {
NR_DIRTIED, /* page dirtyings since bootup */
NR_WRITTEN, /* page writings since bootup */
NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
+ NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
+ NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
NR_VM_NODE_STAT_ITEMS
};
@@ -1372,7 +1374,7 @@ static inline int pfn_valid(unsigned long pfn)
}
#endif
-static inline int pfn_present(unsigned long pfn)
+static inline int pfn_in_present_section(unsigned long pfn)
{
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
@@ -1409,7 +1411,7 @@ void sparse_init(void);
#else
#define sparse_init() do {} while (0)
#define sparse_index_init(_sec, _nid) do {} while (0)
-#define pfn_present pfn_valid
+#define pfn_in_present_section pfn_valid
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 14d14beb1f7f..d27701199a4d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -102,6 +102,15 @@ static inline void page_ref_sub(struct page *page, int nr)
__page_ref_mod(page, -nr);
}
+static inline int page_ref_sub_return(struct page *page, int nr)
+{
+ int ret = atomic_sub_return(nr, &page->_refcount);
+
+ if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
+ __page_ref_mod_and_return(page, -nr, ret);
+ return ret;
+}
+
static inline void page_ref_inc(struct page *page)
{
atomic_inc(&page->_refcount);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ccb14b6a16b5..f56282491a48 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -33,8 +33,8 @@ enum mapping_flags {
/**
* mapping_set_error - record a writeback error in the address_space
- * @mapping - the mapping in which an error should be set
- * @error - the error to set in the mapping
+ * @mapping: the mapping in which an error should be set
+ * @error: the error to set in the mapping
*
* When writeback fails in some way, we must record that error so that
* userspace can be informed when fsync and the like are called. We endeavor
@@ -70,11 +70,9 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
clear_bit(AS_UNEVICTABLE, &mapping->flags);
}
-static inline int mapping_unevictable(struct address_space *mapping)
+static inline bool mapping_unevictable(struct address_space *mapping)
{
- if (mapping)
- return test_bit(AS_UNEVICTABLE, &mapping->flags);
- return !!mapping;
+ return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
}
static inline void mapping_set_exiting(struct address_space *mapping)
@@ -305,9 +303,9 @@ static inline struct page *find_lock_page(struct address_space *mapping,
* atomic allocation!
*/
static inline struct page *find_or_create_page(struct address_space *mapping,
- pgoff_t offset, gfp_t gfp_mask)
+ pgoff_t index, gfp_t gfp_mask)
{
- return pagecache_get_page(mapping, offset,
+ return pagecache_get_page(mapping, index,
FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
gfp_mask);
}
@@ -333,14 +331,19 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
-static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+/*
+ * Given the page we found in the page cache, return the page corresponding
+ * to this index in the file
+ */
+static inline struct page *find_subpage(struct page *head, pgoff_t index)
{
- if (PageHuge(page))
- return page;
+ /* HugeTLBfs wants the head page regardless */
+ if (PageHuge(head))
+ return head;
- VM_BUG_ON_PAGE(PageTail(page), page);
+ VM_BUG_ON_PAGE(PageTail(head), head);
- return page + (offset & (compound_nr(page) - 1));
+ return head + (index & (compound_nr(head) - 1));
}
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a29df79540ce..3e5b090c16d4 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -10,6 +10,8 @@
#include <linux/cred.h>
#include <linux/refcount.h>
#include <linux/posix-timers.h>
+#include <linux/mm_types.h>
+#include <asm/ptrace.h>
/*
* Types defining task->signal and task->sighand and APIs using them:
@@ -377,6 +379,20 @@ static inline int signal_pending_state(long state, struct task_struct *p)
}
/*
+ * This should only be used in fault handlers to decide whether we
+ * should stop the current fault routine to handle the signals
+ * instead, especially with the case where we've got interrupted with
+ * a VM_FAULT_RETRY.
+ */
+static inline bool fault_signal_pending(vm_fault_t fault_flags,
+ struct pt_regs *regs)
+{
+ return unlikely((fault_flags & VM_FAULT_RETRY) &&
+ (fatal_signal_pending(current) ||
+ (user_mode(regs) && signal_pending(current))));
+}
+
+/*
* Reevaluate whether the task has signals pending delivery.
* Wake the task if so.
* This is required every time the blocked sigset_t changes.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1e99f7ac1d7e..b835d8dbea0e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -374,7 +374,6 @@ extern int sysctl_min_slab_ratio;
#define node_reclaim_mode 0
#endif
-extern int page_evictable(struct page *page);
extern void check_move_unevictable_pages(struct pagevec *pvec);
extern int kswapd_run(int nid);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index eb2fe6edd73c..608fa4aadf0e 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -130,20 +130,11 @@ static inline int numa_node_id(void)
* Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
*/
DECLARE_PER_CPU(int, _numa_mem_);
-extern int _node_numa_mem_[MAX_NUMNODES];
#ifndef set_numa_mem
static inline void set_numa_mem(int node)
{
this_cpu_write(_numa_mem_, node);
- _node_numa_mem_[numa_node_id()] = node;
-}
-#endif
-
-#ifndef node_to_mem_node
-static inline int node_to_mem_node(int node)
-{
- return _node_numa_mem_[node];
}
#endif
@@ -166,7 +157,6 @@ static inline int cpu_to_mem(int cpu)
static inline void set_cpu_numa_mem(int cpu, int node)
{
per_cpu(_numa_mem_, cpu) = node;
- _node_numa_mem_[cpu_to_node(cpu)] = node;
}
#endif
@@ -180,13 +170,6 @@ static inline int numa_mem_id(void)
}
#endif
-#ifndef node_to_mem_node
-static inline int node_to_mem_node(int node)
-{
- return node;
-}
-#endif
-
#ifndef cpu_to_mem
static inline int cpu_to_mem(int cpu)
{
diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
new file mode 100644
index 000000000000..4661f7ba07c0
--- /dev/null
+++ b/include/trace/events/mmap.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmap
+
+#if !defined(_TRACE_MMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMAP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(vm_unmapped_area,
+
+ TP_PROTO(unsigned long addr, struct vm_unmapped_area_info *info),
+
+ TP_ARGS(addr, info),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, total_vm)
+ __field(unsigned long, flags)
+ __field(unsigned long, length)
+ __field(unsigned long, low_limit)
+ __field(unsigned long, high_limit)
+ __field(unsigned long, align_mask)
+ __field(unsigned long, align_offset)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->total_vm = current->mm->total_vm;
+ __entry->flags = info->flags;
+ __entry->length = info->length;
+ __entry->low_limit = info->low_limit;
+ __entry->high_limit = info->high_limit;
+ __entry->align_mask = info->align_mask;
+ __entry->align_offset = info->align_offset;
+ ),
+
+ TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
+ IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
+ IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
+ __entry->total_vm, __entry->flags, __entry->length,
+ __entry->low_limit, __entry->high_limit, __entry->align_mask,
+ __entry->align_offset)
+);
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+#define MREMAP_DONTUNMAP 4
#define OVERCOMMIT_GUESS 0
#define OVERCOMMIT_ALWAYS 1