summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/task_mmu.c285
1 files changed, 133 insertions, 152 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3b4d8255e806..41f1a50c10c9 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -446,6 +446,7 @@ struct mem_size_stats {
unsigned long anonymous_thp;
unsigned long swap;
u64 pss;
+ u64 swap_pss;
};
static void smaps_account(struct mem_size_stats *mss, struct page *page,
@@ -492,9 +493,20 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
- if (!non_swap_entry(swpent))
+ if (!non_swap_entry(swpent)) {
+ int mapcount;
+
mss->swap += PAGE_SIZE;
- else if (is_migration_entry(swpent))
+ mapcount = swp_swapcount(swpent);
+ if (mapcount >= 2) {
+ u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
+
+ do_div(pss_delta, mapcount);
+ mss->swap_pss += pss_delta;
+ } else {
+ mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
+ }
+ } else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
}
@@ -640,6 +652,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
"Anonymous: %8lu kB\n"
"AnonHugePages: %8lu kB\n"
"Swap: %8lu kB\n"
+ "SwapPss: %8lu kB\n"
"KernelPageSize: %8lu kB\n"
"MMUPageSize: %8lu kB\n"
"Locked: %8lu kB\n",
@@ -654,6 +667,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
mss.anonymous >> 10,
mss.anonymous_thp >> 10,
mss.swap >> 10,
+ (unsigned long)(mss.swap_pss >> (10 + PSS_SHIFT)),
vma_kernel_pagesize(vma) >> 10,
vma_mmu_pagesize(vma) >> 10,
(vma->vm_flags & VM_LOCKED) ?
@@ -712,23 +726,6 @@ const struct file_operations proc_tid_smaps_operations = {
.release = proc_map_release,
};
-/*
- * We do not want to have constant page-shift bits sitting in
- * pagemap entries and are about to reuse them some time soon.
- *
- * Here's the "migration strategy":
- * 1. when the system boots these bits remain what they are,
- * but a warning about future change is printed in log;
- * 2. once anyone clears soft-dirty bits via clear_refs file,
- * these flag is set to denote, that user is aware of the
- * new API and those page-shift bits change their meaning.
- * The respective warning is printed in dmesg;
- * 3. In a couple of releases we will remove all the mentions
- * of page-shift in pagemap entries.
- */
-
-static bool soft_dirty_cleared __read_mostly;
-
enum clear_refs_types {
CLEAR_REFS_ALL = 1,
CLEAR_REFS_ANON,
@@ -889,13 +886,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
return -EINVAL;
- if (type == CLEAR_REFS_SOFT_DIRTY) {
- soft_dirty_cleared = true;
- pr_warn_once("The pagemap bits 55-60 has changed their meaning!"
- " See the linux/Documentation/vm/pagemap.txt for "
- "details.\n");
- }
-
task = get_proc_task(file_inode(file));
if (!task)
return -ESRCH;
@@ -963,36 +953,26 @@ typedef struct {
struct pagemapread {
int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
- bool v2;
+ bool show_pfn;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
-#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
-#define PM_STATUS_BITS 3
-#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
-#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
-#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
-#define PM_PSHIFT_BITS 6
-#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
-#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
-#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
-#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
-/* in "new" pagemap pshift bits are occupied with more status bits */
-#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
-
-#define __PM_SOFT_DIRTY (1LL)
-#define PM_PRESENT PM_STATUS(4LL)
-#define PM_SWAP PM_STATUS(2LL)
-#define PM_FILE PM_STATUS(1LL)
-#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0)
+#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
+#define PM_SOFT_DIRTY BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_FILE BIT_ULL(61)
+#define PM_SWAP BIT_ULL(62)
+#define PM_PRESENT BIT_ULL(63)
+
#define PM_END_OF_BUFFER 1
-static inline pagemap_entry_t make_pme(u64 val)
+static inline pagemap_entry_t make_pme(u64 frame, u64 flags)
{
- return (pagemap_entry_t) { .pme = val };
+ return (pagemap_entry_t) { .pme = (frame & PM_PFRAME_MASK) | flags };
}
static int add_to_pagemap(unsigned long addr, pagemap_entry_t *pme,
@@ -1013,7 +993,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
while (addr < end) {
struct vm_area_struct *vma = find_vma(walk->mm, addr);
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
+ pagemap_entry_t pme = make_pme(0, 0);
/* End of address space hole, which we mark as non-present. */
unsigned long hole_end;
@@ -1033,7 +1013,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
/* Addresses in the VMA. */
if (vma->vm_flags & VM_SOFTDIRTY)
- pme.pme |= PM_STATUS2(pm->v2, __PM_SOFT_DIRTY);
+ pme = make_pme(0, PM_SOFT_DIRTY);
for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
err = add_to_pagemap(addr, &pme, pm);
if (err)
@@ -1044,67 +1024,42 @@ out:
return err;
}
-static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- u64 frame, flags;
+ u64 frame = 0, flags = 0;
struct page *page = NULL;
- int flags2 = 0;
if (pte_present(pte)) {
- frame = pte_pfn(pte);
- flags = PM_PRESENT;
+ if (pm->show_pfn)
+ frame = pte_pfn(pte);
+ flags |= PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
if (pte_soft_dirty(pte))
- flags2 |= __PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
- flags2 |= __PM_SOFT_DIRTY;
+ flags |= PM_SOFT_DIRTY;
entry = pte_to_swp_entry(pte);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
- flags = PM_SWAP;
+ flags |= PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
- } else {
- if (vma->vm_flags & VM_SOFTDIRTY)
- flags2 |= __PM_SOFT_DIRTY;
- *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
- return;
}
if (page && !PageAnon(page))
flags |= PM_FILE;
- if ((vma->vm_flags & VM_SOFTDIRTY))
- flags2 |= __PM_SOFT_DIRTY;
-
- *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
-}
+ if (page && page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+ if (vma->vm_flags & VM_SOFTDIRTY)
+ flags |= PM_SOFT_DIRTY;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
- pmd_t pmd, int offset, int pmd_flags2)
-{
- /*
- * Currently pmd for thp is always present because thp can not be
- * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
- * This if-check is just to prepare for future implementation.
- */
- if (pmd_present(pmd))
- *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
- | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
- else
- *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
+ return make_pme(frame, flags);
}
-#else
-static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
- pmd_t pmd, int offset, int pmd_flags2)
-{
-}
-#endif
-static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
@@ -1113,41 +1068,58 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte, *orig_pte;
int err = 0;
- if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- int pmd_flags2;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (pmd_trans_huge_lock(pmdp, vma, &ptl) == 1) {
+ u64 flags = 0, frame = 0;
+ pmd_t pmd = *pmdp;
- if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
- pmd_flags2 = __PM_SOFT_DIRTY;
- else
- pmd_flags2 = 0;
+ if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
+ flags |= PM_SOFT_DIRTY;
+
+ /*
+ * Currently pmd for thp is always present because thp
+ * can not be swapped-out, migrated, or HWPOISONed
+ * (split in such cases instead.)
+ * This if-check is just to prepare for future implementation.
+ */
+ if (pmd_present(pmd)) {
+ struct page *page = pmd_page(pmd);
+
+ if (page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+
+ flags |= PM_PRESENT;
+ if (pm->show_pfn)
+ frame = pmd_pfn(pmd) +
+ ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ }
for (; addr != end; addr += PAGE_SIZE) {
- unsigned long offset;
- pagemap_entry_t pme;
+ pagemap_entry_t pme = make_pme(frame, flags);
- offset = (addr & ~PAGEMAP_WALK_MASK) >>
- PAGE_SHIFT;
- thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
+ if (pm->show_pfn && (flags & PM_PRESENT))
+ frame++;
}
spin_unlock(ptl);
return err;
}
- if (pmd_trans_unstable(pmd))
+ if (pmd_trans_unstable(pmdp))
return 0;
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* We can assume that @vma always points to a valid one and @end never
* goes beyond vma->vm_end.
*/
- orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
for (; addr < end; pte++, addr += PAGE_SIZE) {
pagemap_entry_t pme;
- pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
+ pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
@@ -1160,40 +1132,44 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
#ifdef CONFIG_HUGETLB_PAGE
-static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
- pte_t pte, int offset, int flags2)
-{
- if (pte_present(pte))
- *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
- PM_STATUS2(pm->v2, flags2) |
- PM_PRESENT);
- else
- *pme = make_pme(PM_NOT_PRESENT(pm->v2) |
- PM_STATUS2(pm->v2, flags2));
-}
-
/* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
+static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct pagemapread *pm = walk->private;
struct vm_area_struct *vma = walk->vma;
+ u64 flags = 0, frame = 0;
int err = 0;
- int flags2;
- pagemap_entry_t pme;
+ pte_t pte;
if (vma->vm_flags & VM_SOFTDIRTY)
- flags2 = __PM_SOFT_DIRTY;
- else
- flags2 = 0;
+ flags |= PM_SOFT_DIRTY;
+
+ pte = huge_ptep_get(ptep);
+ if (pte_present(pte)) {
+ struct page *page = pte_page(pte);
+
+ if (!PageAnon(page))
+ flags |= PM_FILE;
+
+ if (page_mapcount(page) == 1)
+ flags |= PM_MMAP_EXCLUSIVE;
+
+ flags |= PM_PRESENT;
+ if (pm->show_pfn)
+ frame = pte_pfn(pte) +
+ ((addr & ~hmask) >> PAGE_SHIFT);
+ }
for (; addr != end; addr += PAGE_SIZE) {
- int offset = (addr & ~hmask) >> PAGE_SHIFT;
- huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
+ pagemap_entry_t pme = make_pme(frame, flags);
+
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
+ if (pm->show_pfn && (flags & PM_PRESENT))
+ frame++;
}
cond_resched();
@@ -1211,7 +1187,9 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* Bits 0-54 page frame number (PFN) if present
* Bits 0-4 swap type if swapped
* Bits 5-54 swap offset if swapped
- * Bits 55-60 page shift (page size = 1<<page shift)
+ * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bit 56 page exclusively mapped
+ * Bits 57-60 zero
* Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
@@ -1229,42 +1207,37 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
static ssize_t pagemap_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task = get_proc_task(file_inode(file));
- struct mm_struct *mm;
+ struct mm_struct *mm = file->private_data;
struct pagemapread pm;
- int ret = -ESRCH;
struct mm_walk pagemap_walk = {};
unsigned long src;
unsigned long svpfn;
unsigned long start_vaddr;
unsigned long end_vaddr;
- int copied = 0;
+ int ret = 0, copied = 0;
- if (!task)
+ if (!mm || !atomic_inc_not_zero(&mm->mm_users))
goto out;
ret = -EINVAL;
/* file position must be aligned */
if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
- goto out_task;
+ goto out_mm;
ret = 0;
if (!count)
- goto out_task;
+ goto out_mm;
+
+ /* do not disclose physical addresses: attack vector */
+ pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
- pm.v2 = soft_dirty_cleared;
pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM;
if (!pm.buffer)
- goto out_task;
-
- mm = mm_access(task, PTRACE_MODE_READ);
- ret = PTR_ERR(mm);
- if (!mm || IS_ERR(mm))
- goto out_free;
+ goto out_mm;
- pagemap_walk.pmd_entry = pagemap_pte_range;
+ pagemap_walk.pmd_entry = pagemap_pmd_range;
pagemap_walk.pte_hole = pagemap_pte_hole;
#ifdef CONFIG_HUGETLB_PAGE
pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
@@ -1275,10 +1248,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
start_vaddr = svpfn << PAGE_SHIFT;
- end_vaddr = TASK_SIZE_OF(task);
+ end_vaddr = mm->task_size;
/* watch out for wraparound */
- if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
+ if (svpfn > mm->task_size >> PAGE_SHIFT)
start_vaddr = end_vaddr;
/*
@@ -1305,7 +1278,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
len = min(count, PM_ENTRY_BYTES * pm.pos);
if (copy_to_user(buf, pm.buffer, len)) {
ret = -EFAULT;
- goto out_mm;
+ goto out_free;
}
copied += len;
buf += len;
@@ -1315,24 +1288,31 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!ret || ret == PM_END_OF_BUFFER)
ret = copied;
-out_mm:
- mmput(mm);
out_free:
kfree(pm.buffer);
-out_task:
- put_task_struct(task);
+out_mm:
+ mmput(mm);
out:
return ret;
}
static int pagemap_open(struct inode *inode, struct file *file)
{
- /* do not disclose physical addresses: attack vector */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
- "to stop being page-shift some time soon. See the "
- "linux/Documentation/vm/pagemap.txt for details.\n");
+ struct mm_struct *mm;
+
+ mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR(mm))
+ return PTR_ERR(mm);
+ file->private_data = mm;
+ return 0;
+}
+
+static int pagemap_release(struct inode *inode, struct file *file)
+{
+ struct mm_struct *mm = file->private_data;
+
+ if (mm)
+ mmdrop(mm);
return 0;
}
@@ -1340,6 +1320,7 @@ const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
.open = pagemap_open,
+ .release = pagemap_release,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */