diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-02 13:55:34 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-02 13:55:34 -0700 |
commit | 6cad420cc695867b4ca710bac21fde21a4102e4b (patch) | |
tree | 890d42abc1e82c2cf5cef583584f88ca70116ce9 /include | |
parent | 7be97138e7276c71cc9ad1752dcb502d28f4400d (diff) | |
parent | 77d6b9094819ba55353de0ef92957f3f54f2c36c (diff) | |
download | lwn-6cad420cc695867b4ca710bac21fde21a4102e4b.tar.gz lwn-6cad420cc695867b4ca710bac21fde21a4102e4b.zip |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
"A large amount of MM, plenty more to come.
Subsystems affected by this patch series:
- tools
- kthread
- kbuild
- scripts
- ocfs2
- vfs
- mm: slub, kmemleak, pagecache, gup, swap, memcg, pagemap, mremap,
sparsemem, kasan, pagealloc, vmscan, compaction, mempolicy,
hugetlbfs, hugetlb"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits)
include/linux/huge_mm.h: check PageTail in hpage_nr_pages even when !THP
mm/hugetlb: fix build failure with HUGETLB_PAGE but not HUGEBTLBFS
selftests/vm: fix map_hugetlb length used for testing read and write
mm/hugetlb: remove unnecessary memory fetch in PageHeadHuge()
mm/hugetlb.c: clean code by removing unnecessary initialization
hugetlb_cgroup: add hugetlb_cgroup reservation docs
hugetlb_cgroup: add hugetlb_cgroup reservation tests
hugetlb: support file_region coalescing again
hugetlb_cgroup: support noreserve mappings
hugetlb_cgroup: add accounting for shared mappings
hugetlb: disable region_add file_region coalescing
hugetlb_cgroup: add reservation accounting for private mappings
mm/hugetlb_cgroup: fix hugetlb_cgroup migration
hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations
hugetlb_cgroup: add hugetlb_cgroup reservation counter
hugetlbfs: Use i_mmap_rwsem to address page fault/truncate race
hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization
mm/memblock.c: remove redundant assignment to variable max_addr
mm: mempolicy: require at least one nodeid for MPOL_PREFERRED
mm: mempolicy: use VM_BUG_ON_VMA in queue_pages_test_walk()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/Kbuild | 52 | ||||
-rw-r--r-- | include/linux/cgroup-defs.h | 5 | ||||
-rw-r--r-- | include/linux/fs.h | 5 | ||||
-rw-r--r-- | include/linux/gfp.h | 6 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 10 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 76 | ||||
-rw-r--r-- | include/linux/hugetlb_cgroup.h | 169 | ||||
-rw-r--r-- | include/linux/kasan.h | 2 | ||||
-rw-r--r-- | include/linux/kthread.h | 3 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 42 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 29 | ||||
-rw-r--r-- | include/linux/mm.h | 239 | ||||
-rw-r--r-- | include/linux/mm_types.h | 7 | ||||
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | include/linux/page_ref.h | 9 | ||||
-rw-r--r-- | include/linux/pagemap.h | 29 | ||||
-rw-r--r-- | include/linux/sched/signal.h | 16 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/linux/topology.h | 17 | ||||
-rw-r--r-- | include/trace/events/mmap.h | 48 | ||||
-rw-r--r-- | include/uapi/linux/mman.h | 5 |
21 files changed, 609 insertions, 167 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index cd17d50697cc..36341dfded70 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -4,6 +4,58 @@ # (This file is not included when SRCARCH=um since UML borrows several # asm headers from the host architecutre.) +mandatory-y += atomic.h +mandatory-y += barrier.h +mandatory-y += bitops.h +mandatory-y += bug.h +mandatory-y += bugs.h +mandatory-y += cacheflush.h +mandatory-y += checksum.h +mandatory-y += compat.h +mandatory-y += current.h +mandatory-y += delay.h +mandatory-y += device.h +mandatory-y += div64.h mandatory-y += dma-contiguous.h +mandatory-y += dma-mapping.h +mandatory-y += dma.h +mandatory-y += emergency-restart.h +mandatory-y += exec.h +mandatory-y += fb.h +mandatory-y += ftrace.h +mandatory-y += futex.h +mandatory-y += hardirq.h +mandatory-y += hw_irq.h +mandatory-y += io.h +mandatory-y += irq.h +mandatory-y += irq_regs.h +mandatory-y += irq_work.h +mandatory-y += kdebug.h +mandatory-y += kmap_types.h +mandatory-y += kprobes.h +mandatory-y += linkage.h +mandatory-y += local.h +mandatory-y += mm-arch-hooks.h +mandatory-y += mmiowb.h +mandatory-y += mmu.h +mandatory-y += mmu_context.h +mandatory-y += module.h mandatory-y += msi.h +mandatory-y += pci.h +mandatory-y += percpu.h +mandatory-y += pgalloc.h +mandatory-y += preempt.h +mandatory-y += sections.h +mandatory-y += serial.h +mandatory-y += shmparam.h mandatory-y += simd.h +mandatory-y += switch_to.h +mandatory-y += timex.h +mandatory-y += tlbflush.h +mandatory-y += topology.h +mandatory-y += trace_clock.h +mandatory-y += uaccess.h +mandatory-y += unaligned.h +mandatory-y += vga.h +mandatory-y += word-at-a-time.h +mandatory-y += xor.h diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 63097cb243cb..e1fafed22db1 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -94,6 +94,11 @@ enum { * Enable legacy local memory.events. */ CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5), + + /* + * Enable recursive subtree protection + */ + CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6), }; /* cftype->flags */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 3d69de600494..f81c822f4d89 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -526,6 +526,11 @@ static inline void i_mmap_lock_write(struct address_space *mapping) down_write(&mapping->i_mmap_rwsem); } +static inline int i_mmap_trylock_write(struct address_space *mapping) +{ + return down_write_trylock(&mapping->i_mmap_rwsem); +} + static inline void i_mmap_unlock_write(struct address_space *mapping) { up_write(&mapping->i_mmap_rwsem); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index e5b817cb86e7..be2754841369 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { } #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif +#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +static inline int arch_make_page_accessible(struct page *page) +{ + return 0; +} +#endif struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 5aca3d1bdb32..680a0d9a9721 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -87,8 +87,6 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT) #define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1)) -extern bool is_vma_temporary_stack(struct vm_area_struct *vma); - extern unsigned long transparent_hugepage_flags; /* @@ -100,7 +98,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (vma->vm_flags & VM_NOHUGEPAGE) return false; - if (is_vma_temporary_stack(vma)) + if (vma_is_temporary_stack(vma)) return false; if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) @@ -289,7 +287,11 @@ static inline struct list_head *page_deferred_list(struct page *page) #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) -#define hpage_nr_pages(x) 1 +static inline int hpage_nr_pages(struct page *page) +{ + VM_BUG_ON_PAGE(PageTail(page), page); + return 1; +} static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1e897e4168ac..5ea05879a0a9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -46,7 +46,52 @@ struct resv_map { long adds_in_progress; struct list_head region_cache; long region_cache_count; +#ifdef CONFIG_CGROUP_HUGETLB + /* + * On private mappings, the counter to uncharge reservations is stored + * here. If these fields are 0, then either the mapping is shared, or + * cgroup accounting is disabled for this resv_map. + */ + struct page_counter *reservation_counter; + unsigned long pages_per_hpage; + struct cgroup_subsys_state *css; +#endif +}; + +/* + * Region tracking -- allows tracking of reservations and instantiated pages + * across the pages in a mapping. + * + * The region data structures are embedded into a resv_map and protected + * by a resv_map's lock. The set of regions within the resv_map represent + * reservations for huge pages, or huge pages that have already been + * instantiated within the map. The from and to elements are huge page + * indicies into the associated mapping. from indicates the starting index + * of the region. to represents the first index past the end of the region. + * + * For example, a file region structure with from == 0 and to == 4 represents + * four huge pages in a mapping. It is important to note that the to element + * represents the first element past the end of the region. This is used in + * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. + * + * Interval notation of the form [from, to) will be used to indicate that + * the endpoint from is inclusive and to is exclusive. + */ +struct file_region { + struct list_head link; + long from; + long to; +#ifdef CONFIG_CGROUP_HUGETLB + /* + * On shared mappings, each reserved region appears as a struct + * file_region in resv_map. These fields hold the info needed to + * uncharge each reservation. + */ + struct page_counter *reservation_counter; + struct cgroup_subsys_state *css; +#endif }; + extern struct resv_map *resv_map_alloc(void); void resv_map_release(struct kref *ref); @@ -109,6 +154,8 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); +struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); + extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages; @@ -151,6 +198,12 @@ static inline unsigned long hugetlb_total_pages(void) return 0; } +static inline struct address_space *hugetlb_page_mapping_lock_write( + struct page *hpage) +{ + return NULL; +} + static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { @@ -390,7 +443,10 @@ static inline bool is_file_hugepages(struct file *file) return is_file_shm_hugepages(file); } - +static inline struct hstate *hstate_inode(struct inode *i) +{ + return HUGETLBFS_SB(i->i_sb)->hstate; +} #else /* !CONFIG_HUGETLBFS */ #define is_file_hugepages(file) false @@ -402,6 +458,10 @@ hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, return ERR_PTR(-ENOSYS); } +static inline struct hstate *hstate_inode(struct inode *i) +{ + return NULL; +} #endif /* !CONFIG_HUGETLBFS */ #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA @@ -432,8 +492,8 @@ struct hstate { unsigned int surplus_huge_pages_node[MAX_NUMNODES]; #ifdef CONFIG_CGROUP_HUGETLB /* cgroup control files */ - struct cftype cgroup_files_dfl[5]; - struct cftype cgroup_files_legacy[5]; + struct cftype cgroup_files_dfl[7]; + struct cftype cgroup_files_legacy[9]; #endif char name[HSTATE_NAME_LEN]; }; @@ -472,11 +532,6 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) -static inline struct hstate *hstate_inode(struct inode *i) -{ - return HUGETLBFS_SB(i->i_sb)->hstate; -} - static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); @@ -729,11 +784,6 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma) return NULL; } -static inline struct hstate *hstate_inode(struct inode *i) -{ - return NULL; -} - static inline struct hstate *page_hstate(struct page *page) { return NULL; diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 063962f6dfc6..2ad6e92f124a 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -18,34 +18,96 @@ #include <linux/mmdebug.h> struct hugetlb_cgroup; +struct resv_map; +struct file_region; + /* * Minimum page order trackable by hugetlb cgroup. - * At least 3 pages are necessary for all the tracking information. + * At least 4 pages are necessary for all the tracking information. + * The second tail page (hpage[2]) is the fault usage cgroup. + * The third tail page (hpage[3]) is the reservation usage cgroup. */ #define HUGETLB_CGROUP_MIN_ORDER 2 #ifdef CONFIG_CGROUP_HUGETLB +enum hugetlb_memory_event { + HUGETLB_MAX, + HUGETLB_NR_MEMORY_EVENTS, +}; -static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +struct hugetlb_cgroup { + struct cgroup_subsys_state css; + + /* + * the counter to account for hugepages from hugetlb. + */ + struct page_counter hugepage[HUGE_MAX_HSTATE]; + + /* + * the counter to account for hugepage reservations from hugetlb. + */ + struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE]; + + atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS]; + atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS]; + + /* Handle for "hugetlb.events" */ + struct cgroup_file events_file[HUGE_MAX_HSTATE]; + + /* Handle for "hugetlb.events.local" */ + struct cgroup_file events_local_file[HUGE_MAX_HSTATE]; +}; + +static inline struct hugetlb_cgroup * +__hugetlb_cgroup_from_page(struct page *page, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - return (struct hugetlb_cgroup *)page[2].private; + if (rsvd) + return (struct hugetlb_cgroup *)page[3].private; + else + return (struct hugetlb_cgroup *)page[2].private; +} + +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +{ + return __hugetlb_cgroup_from_page(page, false); +} + +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_rsvd(struct page *page) +{ + return __hugetlb_cgroup_from_page(page, true); } -static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline int __set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; - page[2].private = (unsigned long)h_cg; + if (rsvd) + page[3].private = (unsigned long)h_cg; + else + page[2].private = (unsigned long)h_cg; return 0; } +static inline int set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return __set_hugetlb_cgroup(page, h_cg, false); +} + +static inline int set_hugetlb_cgroup_rsvd(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return __set_hugetlb_cgroup(page, h_cg, true); +} + static inline bool hugetlb_cgroup_disabled(void) { return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); @@ -53,25 +115,67 @@ static inline bool hugetlb_cgroup_disabled(void) extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); +extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup **ptr); extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, struct page *page); +extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page); extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page); +extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages, + struct page *page); + extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg); +extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg); +extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, + unsigned long start, + unsigned long end); + +extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, + struct file_region *rg, + unsigned long nr_pages); + extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage); #else +static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, + struct file_region *rg, + unsigned long nr_pages) +{ +} + static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) { return NULL; } -static inline -int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_resv(struct page *page) +{ + return NULL; +} + +static inline struct hugetlb_cgroup * +hugetlb_cgroup_from_page_rsvd(struct page *page) +{ + return NULL; +} + +static inline int set_hugetlb_cgroup(struct page *page, + struct hugetlb_cgroup *h_cg) +{ + return 0; +} + +static inline int set_hugetlb_cgroup_rsvd(struct page *page, + struct hugetlb_cgroup *h_cg) { return 0; } @@ -81,28 +185,57 @@ static inline bool hugetlb_cgroup_disabled(void) return true; } -static inline int -hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, - struct hugetlb_cgroup **ptr) +static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, + struct hugetlb_cgroup **ptr) { return 0; } -static inline void -hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, - struct hugetlb_cgroup *h_cg, - struct page *page) +static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx, + unsigned long nr_pages, + struct hugetlb_cgroup **ptr) +{ + return 0; +} + +static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page) { } static inline void -hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) +hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg, + struct page *page) +{ +} + +static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, + struct page *page) +{ +} + +static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx, + unsigned long nr_pages, + struct page *page) +{ +} +static inline void hugetlb_cgroup_uncharge_cgroup(int idx, + unsigned long nr_pages, + struct hugetlb_cgroup *h_cg) { } static inline void -hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, - struct hugetlb_cgroup *h_cg) +hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages, + struct hugetlb_cgroup *h_cg) +{ +} + +static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, + unsigned long start, + unsigned long end) { } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5cde9e7c2664..31314ca7c635 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -190,7 +190,7 @@ void kasan_init_tags(void); void *kasan_reset_tag(const void *addr); -void kasan_report(unsigned long addr, size_t size, +bool kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip); #else /* CONFIG_KASAN_SW_TAGS */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0f9da966934e..8bbcaad7ef0f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -165,7 +165,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker, do { \ kthread_init_work(&(dwork)->work, (fn)); \ timer_setup(&(dwork)->timer, \ - kthread_delayed_work_timer_fn, 0); \ + kthread_delayed_work_timer_fn, \ + TIMER_IRQSAFE); \ } while (0) int kthread_worker_fn(void *worker_ptr); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e9ba01336d4e..1b4150ff64be 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1367,12 +1367,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep); void memcg_kmem_put_cache(struct kmem_cache *cachep); #ifdef CONFIG_MEMCG_KMEM -int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); -void __memcg_kmem_uncharge(struct page *page, int order); -int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, - struct mem_cgroup *memcg); -void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg, - unsigned int nr_pages); +int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, + unsigned int nr_pages); +void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages); +int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); +void __memcg_kmem_uncharge_page(struct page *page, int order); extern struct static_key_false memcg_kmem_enabled_key; extern struct workqueue_struct *memcg_kmem_cache_wq; @@ -1394,32 +1393,33 @@ static inline bool memcg_kmem_enabled(void) return static_branch_unlikely(&memcg_kmem_enabled_key); } -static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge(page, gfp, order); + return __memcg_kmem_charge_page(page, gfp, order); return 0; } -static inline void memcg_kmem_uncharge(struct page *page, int order) +static inline void memcg_kmem_uncharge_page(struct page *page, int order) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge(page, order); + __memcg_kmem_uncharge_page(page, order); } -static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, - int order, struct mem_cgroup *memcg) +static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, + unsigned int nr_pages) { if (memcg_kmem_enabled()) - return __memcg_kmem_charge_memcg(page, gfp, order, memcg); + return __memcg_kmem_charge(memcg, gfp, nr_pages); return 0; } -static inline void memcg_kmem_uncharge_memcg(struct page *page, int order, - struct mem_cgroup *memcg) +static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg, + unsigned int nr_pages) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge_memcg(memcg, 1 << order); + __memcg_kmem_uncharge(memcg, nr_pages); } /* @@ -1436,21 +1436,23 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p); #else -static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { return 0; } -static inline void memcg_kmem_uncharge(struct page *page, int order) +static inline void memcg_kmem_uncharge_page(struct page *page, int order) { } -static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order) +static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, + int order) { return 0; } -static inline void __memcg_kmem_uncharge(struct page *page, int order) +static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5228c62af416..8165278c348a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -173,34 +173,7 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol); extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ -static inline bool vma_migratable(struct vm_area_struct *vma) -{ - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - return false; - - /* - * DAX device mappings require predictable access latency, so avoid - * incurring periodic faults. - */ - if (vma_is_dax(vma)) - return false; - -#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION - if (vma->vm_flags & VM_HUGETLB) - return false; -#endif - - /* - * Migration allocates pages in the highest zone. If we cannot - * do so then migration (at least from node to node) is not - * possible. - */ - if (vma->vm_file && - gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) - < policy_zone) - return false; - return true; -} +extern bool vma_migratable(struct vm_area_struct *vma); extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); extern void mpol_put_task_policy(struct task_struct *); diff --git a/include/linux/mm.h b/include/linux/mm.h index c54fb96cb1e6..937bf719c329 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -27,6 +27,7 @@ #include <linux/memremap.h> #include <linux/overflow.h> #include <linux/sizes.h> +#include <linux/sched.h> struct mempolicy; struct anon_vma; @@ -356,10 +357,12 @@ extern unsigned int kobjsize(const void *objp); /* * Special vmas that are non-mergable, non-mlock()able. - * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask prevents VMA from being scanned with khugepaged */ +#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) + /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE @@ -378,15 +381,75 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; -#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ -#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ -#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ -#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ -#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ -#define FAULT_FLAG_TRIED 0x20 /* Second try */ -#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ -#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ -#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +/** + * Fault flag definitions. + * + * @FAULT_FLAG_WRITE: Fault was a write fault. + * @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE. + * @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked. + * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying. + * @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region. + * @FAULT_FLAG_TRIED: The fault has been tried once. + * @FAULT_FLAG_USER: The fault originated in userspace. + * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. + * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. + * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. + */ +#define FAULT_FLAG_WRITE 0x01 +#define FAULT_FLAG_MKWRITE 0x02 +#define FAULT_FLAG_ALLOW_RETRY 0x04 +#define FAULT_FLAG_RETRY_NOWAIT 0x08 +#define FAULT_FLAG_KILLABLE 0x10 +#define FAULT_FLAG_TRIED 0x20 +#define FAULT_FLAG_USER 0x40 +#define FAULT_FLAG_REMOTE 0x80 +#define FAULT_FLAG_INSTRUCTION 0x100 +#define FAULT_FLAG_INTERRUPTIBLE 0x200 + +/* + * The default fault flags that should be used by most of the + * arch-specific page fault handlers. + */ +#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \ + FAULT_FLAG_KILLABLE | \ + FAULT_FLAG_INTERRUPTIBLE) + +/** + * fault_flag_allow_retry_first - check ALLOW_RETRY the first time + * + * This is mostly used for places where we want to try to avoid taking + * the mmap_sem for too long a time when waiting for another condition + * to change, in which case we can try to be polite to release the + * mmap_sem in the first round to avoid potential starvation of other + * processes that would also want the mmap_sem. + * + * Return: true if the page fault allows retry and this is the first + * attempt of the fault handling; false otherwise. + */ +static inline bool fault_flag_allow_retry_first(unsigned int flags) +{ + return (flags & FAULT_FLAG_ALLOW_RETRY) && + (!(flags & FAULT_FLAG_TRIED)); +} #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ @@ -397,7 +460,8 @@ extern pgprot_t protection_map[16]; { FAULT_FLAG_TRIED, "TRIED" }, \ { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ - { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" } + { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* * vm_fault is filled by the the pagefault handler and passed to the vma's @@ -541,6 +605,30 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma) return !vma->vm_ops; } +static inline bool vma_is_temporary_stack(struct vm_area_struct *vma) +{ + int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); + + if (!maybe_stack) + return false; + + if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == + VM_STACK_INCOMPLETE_SETUP) + return true; + + return false; +} + +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + + if (current->mm != vma->vm_mm) + return true; + + return false; +} #ifdef CONFIG_SHMEM /* * The vma_is_shmem is not inline because it is used only by slow @@ -770,6 +858,24 @@ static inline unsigned int compound_order(struct page *page) return page[1].compound_order; } +static inline bool hpage_pincount_available(struct page *page) +{ + /* + * Can the page->hpage_pinned_refcount field be used? That field is in + * the 3rd page of the compound page, so the smallest (2-page) compound + * pages cannot support it. + */ + page = compound_head(page); + return PageCompound(page) && compound_order(page) > 1; +} + +static inline int compound_pincount(struct page *page) +{ + VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); + page = compound_head(page); + return atomic_read(compound_pincount_ptr(page)); +} + static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1001,6 +1107,8 @@ static inline void get_page(struct page *page) page_ref_inc(page); } +bool __must_check try_grab_page(struct page *page, unsigned int flags); + static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); @@ -1029,29 +1137,87 @@ static inline void put_page(struct page *page) __put_page(page); } -/** - * unpin_user_page() - release a gup-pinned page - * @page: pointer to page to be released +/* + * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload + * the page's refcount so that two separate items are tracked: the original page + * reference count, and also a new count of how many pin_user_pages() calls were + * made against the page. ("gup-pinned" is another term for the latter). * - * Pages that were pinned via pin_user_pages*() must be released via either - * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so - * that eventually such pages can be separately tracked and uniquely handled. In - * particular, interactions with RDMA and filesystems need special handling. + * With this scheme, pin_user_pages() becomes special: such pages are marked as + * distinct from normal pages. As such, the unpin_user_page() call (and its + * variants) must be used in order to release gup-pinned pages. * - * unpin_user_page() and put_page() are not interchangeable, despite this early - * implementation that makes them look the same. unpin_user_page() calls must - * be perfectly matched up with pin*() calls. + * Choice of value: + * + * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference + * counts with respect to pin_user_pages() and unpin_user_page() becomes + * simpler, due to the fact that adding an even power of two to the page + * refcount has the effect of using only the upper N bits, for the code that + * counts up using the bias value. This means that the lower bits are left for + * the exclusive use of the original code that increments and decrements by one + * (or at least, by much smaller values than the bias value). + * + * Of course, once the lower bits overflow into the upper bits (and this is + * OK, because subtraction recovers the original values), then visual inspection + * no longer suffices to directly view the separate counts. However, for normal + * applications that don't have huge page reference counts, this won't be an + * issue. + * + * Locking: the lockless algorithm described in page_cache_get_speculative() + * and page_cache_gup_pin_speculative() provides safe operation for + * get_user_pages and page_mkclean and other calls that race to set up page + * table entries. */ -static inline void unpin_user_page(struct page *page) -{ - put_page(page); -} +#define GUP_PIN_COUNTING_BIAS (1U << 10) +void unpin_user_page(struct page *page); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); - void unpin_user_pages(struct page **pages, unsigned long npages); +/** + * page_maybe_dma_pinned() - report if a page is pinned for DMA. + * + * This function checks if a page has been pinned via a call to + * pin_user_pages*(). + * + * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, + * because it means "definitely not pinned for DMA", but true means "probably + * pinned for DMA, but possibly a false positive due to having at least + * GUP_PIN_COUNTING_BIAS worth of normal page references". + * + * False positives are OK, because: a) it's unlikely for a page to get that many + * refcounts, and b) all the callers of this routine are expected to be able to + * deal gracefully with a false positive. + * + * For huge pages, the result will be exactly correct. That's because we have + * more tracking data available: the 3rd struct page in the compound page is + * used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS + * scheme). + * + * For more information, please see Documentation/vm/pin_user_pages.rst. + * + * @page: pointer to page to be queried. + * @Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. + */ +static inline bool page_maybe_dma_pinned(struct page *page) +{ + if (hpage_pincount_available(page)) + return compound_pincount(page) > 0; + + /* + * page_ref_count() is signed. If that refcount overflows, then + * page_ref_count() returns a negative value, and callers will avoid + * further incrementing the refcount. + * + * Here, for that overflow case, use the signed bit to count a little + * bit higher via unsigned math, and thus still get an accurate result. + */ + return ((unsigned int)page_ref_count(compound_head(page))) >= + GUP_PIN_COUNTING_BIAS; +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif @@ -2364,26 +2530,7 @@ struct vm_unmapped_area_info { unsigned long align_offset; }; -extern unsigned long unmapped_area(struct vm_unmapped_area_info *info); -extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info); - -/* - * Search for an unmapped address range. - * - * We are looking for a range that: - * - does not intersect with any VMA; - * - is contained within the [low_limit, high_limit) interval; - * - is at least the desired size. - * - satisfies (begin_addr & align_mask) == (align_offset & align_mask) - */ -static inline unsigned long -vm_unmapped_area(struct vm_unmapped_area_info *info) -{ - if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) - return unmapped_area_topdown(info); - else - return unmapped_area(info); -} +extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); /* truncate.c */ extern void truncate_inode_pages(struct address_space *, loff_t); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c28911c3afa8..dd555e6d23f3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -137,7 +137,7 @@ struct page { }; struct { /* Second tail page of compound page */ unsigned long _compound_pad_1; /* compound_head */ - unsigned long _compound_pad_2; + atomic_t hpage_pinned_refcount; /* For both global and memcg */ struct list_head deferred_list; }; @@ -226,6 +226,11 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page) return &page[1].compound_mapcount; } +static inline atomic_t *compound_pincount_ptr(struct page *page) +{ + return &page[2].hpage_pinned_refcount; +} + /* * Used for sizing the vmemmap region on some architectures */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 462f6873905a..e84d448988b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -243,6 +243,8 @@ enum node_stat_item { NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ + NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */ + NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */ NR_VM_NODE_STAT_ITEMS }; @@ -1372,7 +1374,7 @@ static inline int pfn_valid(unsigned long pfn) } #endif -static inline int pfn_present(unsigned long pfn) +static inline int pfn_in_present_section(unsigned long pfn) { if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; @@ -1409,7 +1411,7 @@ void sparse_init(void); #else #define sparse_init() do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0) -#define pfn_present pfn_valid +#define pfn_in_present_section pfn_valid #define subsection_map_init(_pfn, _nr_pages) do {} while (0) #endif /* CONFIG_SPARSEMEM */ diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 14d14beb1f7f..d27701199a4d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -102,6 +102,15 @@ static inline void page_ref_sub(struct page *page, int nr) __page_ref_mod(page, -nr); } +static inline int page_ref_sub_return(struct page *page, int nr) +{ + int ret = atomic_sub_return(nr, &page->_refcount); + + if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return)) + __page_ref_mod_and_return(page, -nr, ret); + return ret; +} + static inline void page_ref_inc(struct page *page) { atomic_inc(&page->_refcount); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ccb14b6a16b5..f56282491a48 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -33,8 +33,8 @@ enum mapping_flags { /** * mapping_set_error - record a writeback error in the address_space - * @mapping - the mapping in which an error should be set - * @error - the error to set in the mapping + * @mapping: the mapping in which an error should be set + * @error: the error to set in the mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor @@ -70,11 +70,9 @@ static inline void mapping_clear_unevictable(struct address_space *mapping) clear_bit(AS_UNEVICTABLE, &mapping->flags); } -static inline int mapping_unevictable(struct address_space *mapping) +static inline bool mapping_unevictable(struct address_space *mapping) { - if (mapping) - return test_bit(AS_UNEVICTABLE, &mapping->flags); - return !!mapping; + return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_set_exiting(struct address_space *mapping) @@ -305,9 +303,9 @@ static inline struct page *find_lock_page(struct address_space *mapping, * atomic allocation! */ static inline struct page *find_or_create_page(struct address_space *mapping, - pgoff_t offset, gfp_t gfp_mask) + pgoff_t index, gfp_t gfp_mask) { - return pagecache_get_page(mapping, offset, + return pagecache_get_page(mapping, index, FGP_LOCK|FGP_ACCESSED|FGP_CREAT, gfp_mask); } @@ -333,14 +331,19 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -static inline struct page *find_subpage(struct page *page, pgoff_t offset) +/* + * Given the page we found in the page cache, return the page corresponding + * to this index in the file + */ +static inline struct page *find_subpage(struct page *head, pgoff_t index) { - if (PageHuge(page)) - return page; + /* HugeTLBfs wants the head page regardless */ + if (PageHuge(head)) + return head; - VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageTail(head), head); - return page + (offset & (compound_nr(page) - 1)); + return head + (index & (compound_nr(head) - 1)); } struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a29df79540ce..3e5b090c16d4 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -10,6 +10,8 @@ #include <linux/cred.h> #include <linux/refcount.h> #include <linux/posix-timers.h> +#include <linux/mm_types.h> +#include <asm/ptrace.h> /* * Types defining task->signal and task->sighand and APIs using them: @@ -377,6 +379,20 @@ static inline int signal_pending_state(long state, struct task_struct *p) } /* + * This should only be used in fault handlers to decide whether we + * should stop the current fault routine to handle the signals + * instead, especially with the case where we've got interrupted with + * a VM_FAULT_RETRY. + */ +static inline bool fault_signal_pending(vm_fault_t fault_flags, + struct pt_regs *regs) +{ + return unlikely((fault_flags & VM_FAULT_RETRY) && + (fatal_signal_pending(current) || + (user_mode(regs) && signal_pending(current)))); +} + +/* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. * This is required every time the blocked sigset_t changes. diff --git a/include/linux/swap.h b/include/linux/swap.h index 1e99f7ac1d7e..b835d8dbea0e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -374,7 +374,6 @@ extern int sysctl_min_slab_ratio; #define node_reclaim_mode 0 #endif -extern int page_evictable(struct page *page); extern void check_move_unevictable_pages(struct pagevec *pvec); extern int kswapd_run(int nid); diff --git a/include/linux/topology.h b/include/linux/topology.h index eb2fe6edd73c..608fa4aadf0e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -130,20 +130,11 @@ static inline int numa_node_id(void) * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). */ DECLARE_PER_CPU(int, _numa_mem_); -extern int _node_numa_mem_[MAX_NUMNODES]; #ifndef set_numa_mem static inline void set_numa_mem(int node) { this_cpu_write(_numa_mem_, node); - _node_numa_mem_[numa_node_id()] = node; -} -#endif - -#ifndef node_to_mem_node -static inline int node_to_mem_node(int node) -{ - return _node_numa_mem_[node]; } #endif @@ -166,7 +157,6 @@ static inline int cpu_to_mem(int cpu) static inline void set_cpu_numa_mem(int cpu, int node) { per_cpu(_numa_mem_, cpu) = node; - _node_numa_mem_[cpu_to_node(cpu)] = node; } #endif @@ -180,13 +170,6 @@ static inline int numa_mem_id(void) } #endif -#ifndef node_to_mem_node -static inline int node_to_mem_node(int node) -{ - return node; -} -#endif - #ifndef cpu_to_mem static inline int cpu_to_mem(int cpu) { diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h new file mode 100644 index 000000000000..4661f7ba07c0 --- /dev/null +++ b/include/trace/events/mmap.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mmap + +#if !defined(_TRACE_MMAP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MMAP_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(vm_unmapped_area, + + TP_PROTO(unsigned long addr, struct vm_unmapped_area_info *info), + + TP_ARGS(addr, info), + + TP_STRUCT__entry( + __field(unsigned long, addr) + __field(unsigned long, total_vm) + __field(unsigned long, flags) + __field(unsigned long, length) + __field(unsigned long, low_limit) + __field(unsigned long, high_limit) + __field(unsigned long, align_mask) + __field(unsigned long, align_offset) + ), + + TP_fast_assign( + __entry->addr = addr; + __entry->total_vm = current->mm->total_vm; + __entry->flags = info->flags; + __entry->length = info->length; + __entry->low_limit = info->low_limit; + __entry->high_limit = info->high_limit; + __entry->align_mask = info->align_mask; + __entry->align_offset = info->align_offset; + ), + + TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n", + IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr, + IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0, + __entry->total_vm, __entry->flags, __entry->length, + __entry->low_limit, __entry->high_limit, __entry->align_mask, + __entry->align_offset) +); +#endif + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index fc1a64c3447b..923cc162609c 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -5,8 +5,9 @@ #include <asm/mman.h> #include <asm-generic/hugetlb_encode.h> -#define MREMAP_MAYMOVE 1 -#define MREMAP_FIXED 2 +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 #define OVERCOMMIT_GUESS 0 #define OVERCOMMIT_ALWAYS 1 |