diff options
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 302 |
1 files changed, 251 insertions, 51 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index d006e93d5c93..721f451c3029 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -12,19 +12,21 @@ #include <linux/prio_tree.h> #include <linux/debug_locks.h> #include <linux/mm_types.h> +#include <linux/range.h> +#include <linux/pfn.h> struct mempolicy; struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; -struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; #endif extern unsigned long num_physpages; +extern unsigned long totalram_pages; extern void * high_memory; extern int page_cluster; @@ -34,8 +36,6 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif -extern unsigned long mmap_min_addr; - #include <asm/page.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -78,7 +78,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_MAYSHARE 0x00000080 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) #define VM_GROWSUP 0x00000200 +#else +#define VM_GROWSUP 0x00000000 +#endif #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ @@ -105,6 +109,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ #define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ #define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */ +#define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ + +/* Bits set in the VMA until the stack is in its final location */ +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ) #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -136,6 +144,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ #define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */ +#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ /* * This interface is used by x86 PAT code to identify a pfn mapping that is @@ -265,6 +274,8 @@ static inline int get_page_unless_zero(struct page *page) return atomic_inc_not_zero(&page->_count); } +extern int page_is_ram(unsigned long pfn); + /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); @@ -285,6 +296,14 @@ static inline int is_vmalloc_addr(const void *x) return 0; #endif } +#ifdef CONFIG_MMU +extern int is_vmalloc_or_module_addr(const void *x); +#else +static inline int is_vmalloc_or_module_addr(const void *x) +{ + return 0; +} +#endif static inline struct page *compound_head(struct page *page) { @@ -324,6 +343,7 @@ void put_page(struct page *page); void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); +int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a @@ -478,8 +498,8 @@ static inline void set_compound_order(struct page *page, unsigned long order) #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) -/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allcator */ -#ifdef NODE_NOT_IN_PAGEFLAGS +/* NODE:ZONE or SECTION:ZONE is used to ID a zone for the buddy allocator */ +#ifdef NODE_NOT_IN_PAGE_FLAGS #define ZONEID_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) #define ZONEID_PGOFF ((SECTIONS_PGOFF < ZONES_PGOFF)? \ SECTIONS_PGOFF : ZONES_PGOFF) @@ -575,26 +595,13 @@ static inline void set_page_links(struct page *page, enum zone_type zone, } /* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); - return hint; -} - -/* * Some inline functions in vmstat.h depend on page_zone() */ #include <linux/vmstat.h> static __always_inline void *lowmem_page_address(struct page *page) { - return __va(page_to_pfn(page) << PAGE_SHIFT); + return __va(PFN_PHYS(page_to_pfn(page))); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) @@ -625,13 +632,22 @@ void page_address_init(void); /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; - * with the PAGE_MAPPING_ANON bit set to distinguish it. + * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. + * + * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, + * the PAGE_MAPPING_KSM bit may be set along with the PAGE_MAPPING_ANON bit; + * and then page->mapping points, not to an anon_vma, but to a private + * structure which KSM associates with that merged page. See ksm.h. + * + * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is currently never used. * * Please note that, confusingly, "page_mapping" refers to the inode * address_space which maps the page from disk; whereas "page_mapped" * refers to user virtual address space into which the page is mapped. */ #define PAGE_MAPPING_ANON 1 +#define PAGE_MAPPING_KSM 2 +#define PAGE_MAPPING_FLAGS (PAGE_MAPPING_ANON | PAGE_MAPPING_KSM) extern struct address_space swapper_space; static inline struct address_space *page_mapping(struct page *page) @@ -639,16 +655,19 @@ static inline struct address_space *page_mapping(struct page *page) struct address_space *mapping = page->mapping; VM_BUG_ON(PageSlab(page)); -#ifdef CONFIG_SWAP if (unlikely(PageSwapCache(page))) mapping = &swapper_space; - else -#endif - if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) + else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) mapping = NULL; return mapping; } +/* Neutral page->mapping pointer to address_space or anon_vma or other */ +static inline void *page_rmapping(struct page *page) +{ + return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS); +} + static inline int PageAnon(struct page *page) { return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; @@ -700,11 +719,21 @@ static inline int page_mapped(struct page *page) #define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ +#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned small page */ +#define VM_FAULT_HWPOISON_LARGE 0x0020 /* Hit poisoned large page. Index encoded in upper bits */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ +#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ -#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) +#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ + +#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \ + VM_FAULT_HWPOISON_LARGE) + +/* Encode hstate index for a hwpoisoned large page */ +#define VM_FAULT_SET_HINDEX(x) ((x) << 12) +#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf) /* * Can be called by the pagefault handler when it gets a VM_FAULT_OOM. @@ -715,17 +744,8 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(void); -#ifdef CONFIG_SHMEM -extern int shmem_lock(struct file *file, int lock, struct user_struct *user); -#else -static inline int shmem_lock(struct file *file, int lock, - struct user_struct *user) -{ - return 0; -} -#endif +int shmem_lock(struct file *file, int lock, struct user_struct *user); struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); - int shmem_zero_setup(struct vm_area_struct *); #ifndef CONFIG_MMU @@ -771,6 +791,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlb, * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels + * @hugetlb_entry: if set, called for each hugetlb entry * * (see walk_page_range for more details) */ @@ -780,6 +801,8 @@ struct mm_walk { int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); + int (*hugetlb_entry)(pte_t *, unsigned long, + unsigned long, unsigned long, struct mm_walk *); struct mm_struct *mm; void *private; }; @@ -805,8 +828,15 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, unmap_mapping_range(mapping, holebegin, holelen, 0); } -extern int vmtruncate(struct inode * inode, loff_t offset); -extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); +extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); +extern void truncate_setsize(struct inode *inode, loff_t newsize); +extern int vmtruncate(struct inode *inode, loff_t offset); +extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); + +int truncate_inode_page(struct address_space *mapping, struct page *page); +int generic_error_remove_page(struct address_space *mapping, struct page *page); + +int invalidate_inode_page(struct page *page); #ifdef CONFIG_MMU extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, @@ -826,10 +856,11 @@ extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, int len, int write, int force, + unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +struct page *get_dump_page(unsigned long addr); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned long offset); @@ -839,10 +870,17 @@ int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); void account_page_dirtied(struct page *page, struct address_space *mapping); +void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); @@ -858,6 +896,114 @@ extern int mprotect_fixup(struct vm_area_struct *vma, */ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +/* + * per-process(per-mm_struct) statistics. + */ +#if defined(SPLIT_RSS_COUNTING) +/* + * The mm counters are not protected by its page_table_lock, + * so must be incremented atomically. + */ +static inline void set_mm_counter(struct mm_struct *mm, int member, long value) +{ + atomic_long_set(&mm->rss_stat.count[member], value); +} + +unsigned long get_mm_counter(struct mm_struct *mm, int member); + +static inline void add_mm_counter(struct mm_struct *mm, int member, long value) +{ + atomic_long_add(value, &mm->rss_stat.count[member]); +} + +static inline void inc_mm_counter(struct mm_struct *mm, int member) +{ + atomic_long_inc(&mm->rss_stat.count[member]); +} + +static inline void dec_mm_counter(struct mm_struct *mm, int member) +{ + atomic_long_dec(&mm->rss_stat.count[member]); +} + +#else /* !USE_SPLIT_PTLOCKS */ +/* + * The mm counters are protected by its page_table_lock, + * so can be incremented directly. + */ +static inline void set_mm_counter(struct mm_struct *mm, int member, long value) +{ + mm->rss_stat.count[member] = value; +} + +static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) +{ + return mm->rss_stat.count[member]; +} + +static inline void add_mm_counter(struct mm_struct *mm, int member, long value) +{ + mm->rss_stat.count[member] += value; +} + +static inline void inc_mm_counter(struct mm_struct *mm, int member) +{ + mm->rss_stat.count[member]++; +} + +static inline void dec_mm_counter(struct mm_struct *mm, int member) +{ + mm->rss_stat.count[member]--; +} + +#endif /* !USE_SPLIT_PTLOCKS */ + +static inline unsigned long get_mm_rss(struct mm_struct *mm) +{ + return get_mm_counter(mm, MM_FILEPAGES) + + get_mm_counter(mm, MM_ANONPAGES); +} + +static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm) +{ + return max(mm->hiwater_rss, get_mm_rss(mm)); +} + +static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm) +{ + return max(mm->hiwater_vm, mm->total_vm); +} + +static inline void update_hiwater_rss(struct mm_struct *mm) +{ + unsigned long _rss = get_mm_rss(mm); + + if ((mm)->hiwater_rss < _rss) + (mm)->hiwater_rss = _rss; +} + +static inline void update_hiwater_vm(struct mm_struct *mm) +{ + if (mm->hiwater_vm < mm->total_vm) + mm->hiwater_vm = mm->total_vm; +} + +static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, + struct mm_struct *mm) +{ + unsigned long hiwater_rss = get_mm_hiwater_rss(mm); + + if (*maxrss < hiwater_rss) + *maxrss = hiwater_rss; +} + +#if defined(SPLIT_RSS_COUNTING) +void sync_mm_rss(struct task_struct *task, struct mm_struct *mm); +#else +static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) +{ +} +#endif /* * A callback you can register to apply pressure to ageable caches. @@ -875,7 +1021,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * querying the cache size, so a fastpath for that case is appropriate. */ struct shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask); int seeks; /* seeks to recreate an obj */ /* These are for internal use */ @@ -888,7 +1034,15 @@ extern void unregister_shrinker(struct shrinker *); int vma_wants_writenotify(struct vm_area_struct *vma); -extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); +extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl); +static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) +{ + pte_t *ptep; + __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); + return ptep; +} #ifdef __PAGETABLE_PUD_FOLDED static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, @@ -1028,6 +1182,9 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn, extern void remove_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); +void sort_node_map(void); +unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, + unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, @@ -1035,6 +1192,12 @@ extern void get_pfn_range_for_nid(unsigned int nid, extern unsigned long find_min_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +int add_from_early_node_map(struct range *range, int az, + int nr_range, int nid); +u64 __init find_memory_core_early(int nid, u64 size, u64 align, + u64 goal, u64 limit); +void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, + u64 goal, u64 limit); typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); @@ -1067,14 +1230,13 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern int after_bootmem; -#ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -#else -static inline void setup_per_cpu_pageset(void) {} -#endif + +extern void zone_pcp_update(struct zone *zone); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; +extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); @@ -1096,7 +1258,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, +extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, @@ -1199,8 +1361,10 @@ unsigned long ra_submit(struct file_ra_state *ra, /* Do stack extension */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -#ifdef CONFIG_IA64 +#if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); +#else + #define expand_upwards(vma, address) do { } while (0) #endif extern int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address); @@ -1226,7 +1390,15 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma) return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; } +#ifdef CONFIG_MMU pgprot_t vm_get_page_prot(unsigned long vm_flags); +#else +static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return __pgprot(0); +} +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); @@ -1241,7 +1413,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); @@ -1289,7 +1462,7 @@ int in_gate_area_no_task(unsigned long addr); #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ -int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *, +int drop_caches_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages); @@ -1303,20 +1476,47 @@ extern int randomize_va_space; const char * arch_vma_name(struct vm_area_struct *vma); void print_vma_addr(char *prefix, unsigned long rip); +void sparse_mem_maps_populate_node(struct page **map_map, + unsigned long pnum_begin, + unsigned long pnum_end, + unsigned long map_count, + int nodeid); + struct page *sparse_mem_map_populate(unsigned long pnum, int nid); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); +void *vmemmap_alloc_block_buf(unsigned long size, int node); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, - size_t size); -extern void refund_locked_memory(struct mm_struct *mm, size_t size); + +enum mf_flags { + MF_COUNT_INCREASED = 1 << 0, +}; +extern void memory_failure(unsigned long pfn, int trapno); +extern int __memory_failure(unsigned long pfn, int trapno, int flags); +extern int unpoison_memory(unsigned long pfn); +extern int sysctl_memory_failure_early_kill; +extern int sysctl_memory_failure_recovery; +extern void shake_page(struct page *p, int access); +extern atomic_long_t mce_bad_pages; +extern int soft_offline_page(struct page *page, int flags); +#ifdef CONFIG_MEMORY_FAILURE +int is_hwpoison_address(unsigned long addr); +#else +static inline int is_hwpoison_address(unsigned long addr) +{ + return 0; +} +#endif + +extern void dump_page(struct page *page); + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ |