summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h110
1 files changed, 86 insertions, 24 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 21115933b9b8..c274f75efcf9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -106,7 +106,7 @@ extern int mmap_rnd_compat_bits __read_mostly;
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
- * It's defined as noop for arcitectures that don't support memory tagging.
+ * It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
@@ -372,6 +372,13 @@ extern unsigned int kobjsize(const void *objp);
# define VM_GROWSUP VM_NONE
#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+# define VM_UFFD_MINOR_BIT 37
+# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
+#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+# define VM_UFFD_MINOR VM_NONE
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)
@@ -432,8 +439,7 @@ extern unsigned int kobjsize(const void *objp);
extern pgprot_t protection_map[16];
/**
- * Fault flag definitions.
- *
+ * enum fault_flag - Fault flag definitions.
* @FAULT_FLAG_WRITE: Fault was a write fault.
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
@@ -464,16 +470,18 @@ extern pgprot_t protection_map[16];
* signals before a retry to make sure the continuous page faults can still be
* interrupted if necessary.
*/
-#define FAULT_FLAG_WRITE 0x01
-#define FAULT_FLAG_MKWRITE 0x02
-#define FAULT_FLAG_ALLOW_RETRY 0x04
-#define FAULT_FLAG_RETRY_NOWAIT 0x08
-#define FAULT_FLAG_KILLABLE 0x10
-#define FAULT_FLAG_TRIED 0x20
-#define FAULT_FLAG_USER 0x40
-#define FAULT_FLAG_REMOTE 0x80
-#define FAULT_FLAG_INSTRUCTION 0x100
-#define FAULT_FLAG_INTERRUPTIBLE 0x200
+enum fault_flag {
+ FAULT_FLAG_WRITE = 1 << 0,
+ FAULT_FLAG_MKWRITE = 1 << 1,
+ FAULT_FLAG_ALLOW_RETRY = 1 << 2,
+ FAULT_FLAG_RETRY_NOWAIT = 1 << 3,
+ FAULT_FLAG_KILLABLE = 1 << 4,
+ FAULT_FLAG_TRIED = 1 << 5,
+ FAULT_FLAG_USER = 1 << 6,
+ FAULT_FLAG_REMOTE = 1 << 7,
+ FAULT_FLAG_INSTRUCTION = 1 << 8,
+ FAULT_FLAG_INTERRUPTIBLE = 1 << 9,
+};
/*
* The default fault flags that should be used by most of the
@@ -485,6 +493,7 @@ extern pgprot_t protection_map[16];
/**
* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
+ * @flags: Fault flags.
*
* This is mostly used for places where we want to try to avoid taking
* the mmap_lock for too long a time when waiting for another condition
@@ -495,7 +504,7 @@ extern pgprot_t protection_map[16];
* Return: true if the page fault allows retry and this is the first
* attempt of the fault handling; false otherwise.
*/
-static inline bool fault_flag_allow_retry_first(unsigned int flags)
+static inline bool fault_flag_allow_retry_first(enum fault_flag flags)
{
return (flags & FAULT_FLAG_ALLOW_RETRY) &&
(!(flags & FAULT_FLAG_TRIED));
@@ -530,7 +539,7 @@ struct vm_fault {
pgoff_t pgoff; /* Logical page offset based on vma */
unsigned long address; /* Faulting virtual address */
};
- unsigned int flags; /* FAULT_FLAG_xxx flags
+ enum fault_flag flags; /* FAULT_FLAG_xxx flags
* XXX: should really be 'const' */
pmd_t *pmd; /* Pointer to pmd entry matching
* the 'address' */
@@ -580,7 +589,7 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
/* Called any time before splitting to check if it's allowed */
int (*may_split)(struct vm_area_struct *area, unsigned long addr);
- int (*mremap)(struct vm_area_struct *area, unsigned long flags);
+ int (*mremap)(struct vm_area_struct *area);
/*
* Called by mprotect() to make driver-specific permission
* checks before mprotect() is finalised. The VMA must not
@@ -1132,6 +1141,11 @@ static inline bool is_zone_device_page(const struct page *page)
}
#endif
+static inline bool is_zone_movable_page(const struct page *page)
+{
+ return page_zonenum(page) == ZONE_MOVABLE;
+}
+
#ifdef CONFIG_DEV_PAGEMAP_OPS
void free_devmap_managed_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
@@ -1265,13 +1279,16 @@ static inline void put_page(struct page *page)
void unpin_user_page(struct page *page);
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty);
+void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
+ bool make_dirty);
void unpin_user_pages(struct page **pages, unsigned long npages);
/**
- * page_maybe_dma_pinned() - report if a page is pinned for DMA.
+ * page_maybe_dma_pinned - Report if a page is pinned for DMA.
+ * @page: The page.
*
* This function checks if a page has been pinned via a call to
- * pin_user_pages*().
+ * a function in the pin_user_pages() family.
*
* For non-huge pages, the return value is partially fuzzy: false is not fuzzy,
* because it means "definitely not pinned for DMA", but true means "probably
@@ -1289,9 +1306,8 @@ void unpin_user_pages(struct page **pages, unsigned long npages);
*
* For more information, please see Documentation/core-api/pin_user_pages.rst.
*
- * @page: pointer to page to be queried.
- * @Return: True, if it is likely that the page has been "dma-pinned".
- * False, if the page is definitely not dma-pinned.
+ * Return: True, if it is likely that the page has been "dma-pinned".
+ * False, if the page is definitely not dma-pinned.
*/
static inline bool page_maybe_dma_pinned(struct page *page)
{
@@ -1539,6 +1555,20 @@ static inline unsigned long page_to_section(const struct page *page)
}
#endif
+/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
+#ifdef CONFIG_MIGRATION
+static inline bool is_pinnable_page(struct page *page)
+{
+ return !(is_zone_movable_page(page) || is_migrate_cma_page(page)) ||
+ is_zero_pfn(page_to_pfn(page));
+}
+#else
+static inline bool is_pinnable_page(struct page *page)
+{
+ return true;
+}
+#endif
+
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
@@ -1629,7 +1659,6 @@ static inline pgoff_t page_index(struct page *page)
bool page_mapped(struct page *page);
struct address_space *page_mapping(struct page *page);
-struct address_space *page_mapping_file(struct page *page);
/*
* Return true only if the page has been allocated with
@@ -2357,7 +2386,7 @@ extern unsigned long free_reserved_area(void *start, void *end,
int poison, const char *s);
extern void adjust_managed_page_count(struct page *page, long count);
-extern void mem_init_print_info(const char *str);
+extern void mem_init_print_info(void);
extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
@@ -2731,6 +2760,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot);
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num);
@@ -2790,7 +2821,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
* and return without waiting upon it */
#define FOLL_POPULATE 0x40 /* fault in page */
-#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
@@ -3186,5 +3216,37 @@ void mem_dump_obj(void *object);
static inline void mem_dump_obj(void *object) {}
#endif
+/**
+ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * @seals: the seals to check
+ * @vma: the vma to operate on
+ *
+ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+ * the vma flags. Return 0 if check pass, or <0 for errors.
+ */
+static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+{
+ if (seals & F_SEAL_FUTURE_WRITE) {
+ /*
+ * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+ * "future write" seal active.
+ */
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+ return -EPERM;
+
+ /*
+ * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+ * MAP_SHARED and read-only, take care to not allow mprotect to
+ * revert protections on such mappings. Do this only for shared
+ * mappings. For private mappings, don't need to mask
+ * VM_MAYWRITE as we still want them to be COW-writable.
+ */
+ if (vma->vm_flags & VM_SHARED)
+ vma->vm_flags &= ~(VM_MAYWRITE);
+ }
+
+ return 0;
+}
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */