From 895dd92c032e1604aa0d7afaef7716e058343b67 Mon Sep 17 00:00:00 2001 From: Andrew Vagin Date: Thu, 12 Jul 2012 14:14:29 +0400 Subject: sched: Deliver sched_switch events to the current task Otherwise they can't be filtered for a defined task: perf record -e sched:sched_switch ./foo This command doesn't report any events without this patch. I think it isn't a security concern if someone knows who will be executed next - this can already be observed by polling /proc state. By default perf is disabled for non-root users in any case. I need these events for profiling sleep times. sched_switch is used for getting callchains and sched_stat_* is used for getting time periods. These events are combined in user space, then it can be analyzed by perf tools. Signed-off-by: Andrew Vagin Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Arun Sharma Link: http://lkml.kernel.org/r/1342088069-1005148-1-git-send-email-avagin@openvz.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 468bdd44c1ba..ad732b56ba70 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1910,12 +1910,12 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + trace_sched_switch(prev, next); sched_info_switch(prev, next); perf_event_task_sched_out(prev, next); fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); - trace_sched_switch(prev, next); } /** -- cgit v1.2.3 From f403072c6108e15f319a4a5036b650c77760522c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:12 +0200 Subject: uprobes: Don't recheck vma/f_mapping in write_opcode() write_opcode() rechecks valid_vma() and ->f_mapping, this is pointless. The caller, register_for_each_vma() or uprobe_mmap(), has already done these checks under mmap_sem. To clarify, uprobe_mmap() checks valid_vma() only, but we can rely on build_probe_list(vm_file->f_mapping->host). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182212.GA20304@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f93532748bca..a2b32a51d0a2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -206,33 +206,16 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) { struct page *old_page, *new_page; - struct address_space *mapping; void *vaddr_old, *vaddr_new; struct vm_area_struct *vma; - struct uprobe *uprobe; int ret; + retry: /* Read the page with vaddr into memory */ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); if (ret <= 0) return ret; - ret = -EINVAL; - - /* - * We are interested in text pages only. Our pages of interest - * should be mapped for read and execute only. We desist from - * adding probes in write mapped pages since the breakpoints - * might end up in the file copy. - */ - if (!valid_vma(vma, is_swbp_insn(&opcode))) - goto put_out; - - uprobe = container_of(auprobe, struct uprobe, arch); - mapping = uprobe->inode->i_mapping; - if (mapping != vma->vm_file->f_mapping) - goto put_out; - ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) -- cgit v1.2.3 From c517ee744b96e441d9c731e245f83c6d08dc0a19 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:16 +0200 Subject: uprobes: __replace_page() should not use page_address_in_vma() page_address_in_vma(old_page) in __replace_page() is ugly and wrong. The caller already knows the correct virtual address, this page was found by get_user_pages(vaddr). However, page_address_in_vma() can actually fail if page->mapping was cleared by __delete_from_page_cache() after get_user_pages() returns. But this means the race with page reclaim, write_opcode() should not fail, it should retry and read this page again. Probably the race with remove_mapping() is not possible due to page_freeze_refs() logic, but afaics at least shmem_writepage()->shmem_delete_from_page_cache() can clear ->mapping. We could change __replace_page() to return -EAGAIN in this case, but it would be better to simply use the caller's vaddr and rely on page_check_address(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182216.GA20311@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a2b32a51d0a2..6fda7996892b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -127,22 +127,19 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) * based on replace_page in mm/ksm.c * * @vma: vma that holds the pte pointing to page + * @addr: address the old @page is mapped at * @page: the cowed page we are replacing by kpage * @kpage: the modified page we replace page by * * Returns 0 on success, -EFAULT on failure. */ -static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) +static int __replace_page(struct vm_area_struct *vma, unsigned long addr, + struct page *page, struct page *kpage) { struct mm_struct *mm = vma->vm_mm; - unsigned long addr; spinlock_t *ptl; pte_t *ptep; - addr = page_address_in_vma(page, vma); - if (addr == -EFAULT) - return -EFAULT; - ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) return -EAGAIN; @@ -243,7 +240,7 @@ retry: goto unlock_out; lock_page(new_page); - ret = __replace_page(vma, old_page, new_page); + ret = __replace_page(vma, vaddr, old_page, new_page); unlock_page(new_page); unlock_out: -- cgit v1.2.3 From 089ba999dc881a7549d97c55ac9e0052d061867d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:18 +0200 Subject: uprobes: Kill write_opcode()->lock_page(new_page) write_opcode() does lock_page(new_page) for no reason. Nobody can see this page until __replace_page() exposes it under ptl lock, and we do nothing with this page after pte_unmap_unlock(). If nothing else, the similar code in do_wp_page() doesn't lock the new page for page_add_new_anon_rmap/set_pte_at_notify. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182218.GA20315@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6fda7996892b..23c562b7fc2e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -239,9 +239,7 @@ retry: if (ret) goto unlock_out; - lock_page(new_page); ret = __replace_page(vma, vaddr, old_page, new_page); - unlock_page(new_page); unlock_out: unlock_page(old_page); -- cgit v1.2.3 From 9f92448ceeea5326db7d114005a7e7ac03904edf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:20 +0200 Subject: uprobes: Clean up and document write_opcode()->lock_page(old_page) The comment above write_opcode()->lock_page(old_page) tells about the race with do_wp_page(). I don't really understand which exactly race it means, but afaics this lock_page() was not enough to close all races with do_wp_page(). Anyway, since: 77fc4af1b59d uprobes: Change register_for_each_vma() to take mm->mmap_sem for writing this code is always called with ->mmap_sem held for writing, so we can forget about do_wp_page(). However, we can't simply remove this lock_page(), and the only (afaics) reason is __replace_page()->try_to_free_swap(). Nothing in write_opcode() needs it, move it into __replace_page() and fix the comment. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182220.GA20322@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 23c562b7fc2e..5db150b306d2 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -139,10 +139,15 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; pte_t *ptep; + int err; + /* freeze PageSwapCache() for try_to_free_swap() below */ + lock_page(page); + + err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - return -EAGAIN; + goto unlock; get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr); @@ -162,7 +167,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, put_page(page); pte_unmap_unlock(ptep, ptl); - return 0; + err = 0; + unlock: + unlock_page(page); + return err; } /** @@ -216,15 +224,10 @@ retry: ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) - goto put_out; + goto put_old; __SetPageUptodate(new_page); - /* - * lock page will serialize against do_wp_page()'s - * PageAnon() handling - */ - lock_page(old_page); /* copy the page now that we've got it stable */ vaddr_old = kmap_atomic(old_page); vaddr_new = kmap_atomic(new_page); @@ -237,15 +240,13 @@ retry: ret = anon_vma_prepare(vma); if (ret) - goto unlock_out; + goto put_new; ret = __replace_page(vma, vaddr, old_page, new_page); -unlock_out: - unlock_page(old_page); +put_new: page_cache_release(new_page); - -put_out: +put_old: put_page(old_page); if (unlikely(ret == -EAGAIN)) -- cgit v1.2.3 From 665605a2a207dbe1fa429b474f932d6ea138ba92 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:29 +0200 Subject: uprobes: Uprobe_mmap/munmap needs list_for_each_entry_safe() The bug was introduced by me in 449d0d7c ("uprobes: Simplify the usage of uprobe->pending_list"). Yes, we do not care about uprobe->pending_list after return and nobody can remove the current list entry, but put_uprobe(uprobe) can actually free it and thus we need list_for_each_safe(). Reported-by: Srikar Dronamraju Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Link: http://lkml.kernel.org/r/20120729182229.GA20329@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5db150b306d2..bed2161620d7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1010,7 +1010,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; int ret, count; @@ -1028,7 +1028,7 @@ int uprobe_mmap(struct vm_area_struct *vma) ret = 0; count = 0; - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { loff_t vaddr = vma_address(vma, uprobe->offset); @@ -1076,7 +1076,7 @@ int uprobe_mmap(struct vm_area_struct *vma) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct list_head tmp_list; - struct uprobe *uprobe; + struct uprobe *uprobe, *u; struct inode *inode; if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) @@ -1093,7 +1093,7 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, &tmp_list); - list_for_each_entry(uprobe, &tmp_list, pending_list) { + list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { loff_t vaddr = vma_address(vma, uprobe->offset); if (vaddr >= start && vaddr < end) { -- cgit v1.2.3 From 2fd611a991391a6050cbd139201a2e12fc306540 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:31 +0200 Subject: uprobes: Suppress uprobe_munmap() from mmput() uprobe_munmap() does get_user_pages() and it is also called from the final mmput()->exit_mmap() path. This slows down exit/mmput() for no reason, and I think it is simply dangerous/wrong to try to fault-in a page into the dying mm. If nothing else, this happens after the last sync_mm_rss(), afaics handle_mm_fault() can change the task->rss_stat and make the subsequent check_mm() unhappy. Change uprobe_munmap() to check mm->mm_users != 0. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182231.GA20336@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bed2161620d7..9db9cdf8ff34 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1082,6 +1082,9 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; + if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ + return; + if (!atomic_read(&vma->vm_mm->uprobes_state.count)) return; -- cgit v1.2.3 From aefd8933d445abf7ff0d4027c624737898827bcd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:33 +0200 Subject: uprobes: Fix overflow in vma_address()/find_active_uprobe() vma->vm_pgoff is "unsigned long", it should be promoted to loff_t before the multiplication to avoid the overflow. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182233.GA20339@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 9db9cdf8ff34..fb961d5e205c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -117,7 +117,7 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) loff_t vaddr; vaddr = vma->vm_start + offset; - vaddr -= vma->vm_pgoff << PAGE_SHIFT; + vaddr -= (loff_t)vma->vm_pgoff << PAGE_SHIFT; return vaddr; } @@ -1450,7 +1450,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) inode = vma->vm_file->f_mapping->host; offset = bp_vaddr - vma->vm_start; - offset += (vma->vm_pgoff << PAGE_SHIFT); + offset += (loff_t)vma->vm_pgoff << PAGE_SHIFT; uprobe = find_uprobe(inode, offset); } -- cgit v1.2.3 From 891c39708144bbe401b4aa151bffd0fe41b1dafd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:40 +0200 Subject: uprobes: Teach build_probe_list() to consider the range Currently build_probe_list() builds the list of all uprobes attached to the given inode, and the caller should filter out those who don't fall into the [start,end) range, this is sub-optimal. This patch turns find_least_offset_node() into find_node_in_range() which returns the first node inside the [min,max] range, and changes build_probe_list() to use this node as a starting point for rb_prev() and rb_next() to find all other nodes the caller needs. The resulting list is no longer sorted but we do not care. This can speed up both build_probe_list() and the callers, but there is another reason to introduce find_node_in_range(). It can be used to figure out whether the given vma has uprobes or not, this will be needed soon. While at it, shift INIT_LIST_HEAD(tmp_list) into build_probe_list(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182240.GA20352@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 103 +++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 53 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index fb961d5e205c..2ef123306183 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -939,59 +939,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume put_uprobe(uprobe); } -/* - * Of all the nodes that correspond to the given inode, return the node - * with the least offset. - */ -static struct rb_node *find_least_offset_node(struct inode *inode) +static struct rb_node * +find_node_in_range(struct inode *inode, loff_t min, loff_t max) { - struct uprobe u = { .inode = inode, .offset = 0}; struct rb_node *n = uprobes_tree.rb_node; - struct rb_node *close_node = NULL; - struct uprobe *uprobe; - int match; while (n) { - uprobe = rb_entry(n, struct uprobe, rb_node); - match = match_uprobe(&u, uprobe); - - if (uprobe->inode == inode) - close_node = n; + struct uprobe *u = rb_entry(n, struct uprobe, rb_node); - if (!match) - return close_node; - - if (match < 0) + if (inode < u->inode) { n = n->rb_left; - else + } else if (inode > u->inode) { n = n->rb_right; + } else { + if (max < u->offset) + n = n->rb_left; + else if (min > u->offset) + n = n->rb_right; + else + break; + } } - return close_node; + return n; } /* - * For a given inode, build a list of probes that need to be inserted. + * For a given range in vma, build a list of probes that need to be inserted. */ -static void build_probe_list(struct inode *inode, struct list_head *head) +static void build_probe_list(struct inode *inode, + struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *head) { - struct uprobe *uprobe; + loff_t min, max; unsigned long flags; - struct rb_node *n; - - spin_lock_irqsave(&uprobes_treelock, flags); - - n = find_least_offset_node(inode); + struct rb_node *n, *t; + struct uprobe *u; - for (; n; n = rb_next(n)) { - uprobe = rb_entry(n, struct uprobe, rb_node); - if (uprobe->inode != inode) - break; + INIT_LIST_HEAD(head); + min = ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + start - vma->vm_start; + max = min + (end - start) - 1; - list_add(&uprobe->pending_list, head); - atomic_inc(&uprobe->ref); + spin_lock_irqsave(&uprobes_treelock, flags); + n = find_node_in_range(inode, min, max); + if (n) { + for (t = n; t; t = rb_prev(t)) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset < min) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } + for (t = n; (t = rb_next(t)); ) { + u = rb_entry(t, struct uprobe, rb_node); + if (u->inode != inode || u->offset > max) + break; + list_add(&u->pending_list, head); + atomic_inc(&u->ref); + } } - spin_unlock_irqrestore(&uprobes_treelock, flags); } @@ -1021,9 +1028,8 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!inode) return 0; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); + build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); ret = 0; count = 0; @@ -1032,11 +1038,6 @@ int uprobe_mmap(struct vm_area_struct *vma) if (!ret) { loff_t vaddr = vma_address(vma, uprobe->offset); - if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { - put_uprobe(uprobe); - continue; - } - ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); /* * We can race against uprobe_register(), see the @@ -1092,21 +1093,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon if (!inode) return; - INIT_LIST_HEAD(&tmp_list); mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, &tmp_list); + build_probe_list(inode, vma, start, end, &tmp_list); list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { loff_t vaddr = vma_address(vma, uprobe->offset); - - if (vaddr >= start && vaddr < end) { - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - } + /* + * An unregister could have removed the probe before + * unmap. So check before we decrement the count. + */ + if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) + atomic_dec(&vma->vm_mm->uprobes_state.count); put_uprobe(uprobe); } mutex_unlock(uprobes_mmap_hash(inode)); -- cgit v1.2.3 From cb113b47d098185f3f1f67e8300d05ddce842b66 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:42 +0200 Subject: uprobes: Introduce vaddr_to_offset(vma, vaddr) Add the new helper, vaddr_to_offset(vma, vaddr) which returns the offset in vma->vm_file this vaddr is mapped at. Change build_probe_list() and find_active_uprobe() to use the new helper, the next patch adds another user. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182242.GA20355@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2ef123306183..b03256cced52 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -122,6 +122,11 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) return vaddr; } +static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) +{ + return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); +} + /** * __replace_page - replace page in vma by new page. * based on replace_page in mm/ksm.c @@ -978,7 +983,7 @@ static void build_probe_list(struct inode *inode, struct uprobe *u; INIT_LIST_HEAD(head); - min = ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + start - vma->vm_start; + min = vaddr_to_offset(vma, start); max = min + (end - start) - 1; spin_lock_irqsave(&uprobes_treelock, flags); @@ -1442,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) vma = find_vma(mm, bp_vaddr); if (vma && vma->vm_start <= bp_vaddr) { if (valid_vma(vma, false)) { - struct inode *inode; - loff_t offset; + struct inode *inode = vma->vm_file->f_mapping->host; + loff_t offset = vaddr_to_offset(vma, bp_vaddr); - inode = vma->vm_file->f_mapping->host; - offset = bp_vaddr - vma->vm_start; - offset += (loff_t)vma->vm_pgoff << PAGE_SHIFT; uprobe = find_uprobe(inode, offset); } -- cgit v1.2.3 From f4d6dfe55115efe981b4b5f37183ddccaaa792f0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:44 +0200 Subject: uprobes: Fix register_for_each_vma()->vma_address() check 1. register_for_each_vma() checks that vma_address() == vaddr, but this is not enough. We should also ensure that vaddr >= vm_start, find_vma() guarantees "vaddr < vm_end" only. 2. After the prevous changes, register_for_each_vma() is the only reason why vma_address() has to return loff_t, all other users know that we have the valid mapping at this offset and thus the overflow is not possible. Change the code to use vaddr_to_offset() instead, imho this looks more clean/understandable and now we can change vma_address(). 3. While at it, remove the unnecessary type-cast. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182244.GA20362@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b03256cced52..cdc3c951251c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -823,12 +823,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) goto free; down_write(&mm->mmap_sem); - vma = find_vma(mm, (unsigned long)info->vaddr); - if (!vma || !valid_vma(vma, is_register)) + vma = find_vma(mm, info->vaddr); + if (!vma || !valid_vma(vma, is_register) || + vma->vm_file->f_mapping->host != uprobe->inode) goto unlock; - if (vma->vm_file->f_mapping->host != uprobe->inode || - vma_address(vma, uprobe->offset) != info->vaddr) + if (vma->vm_start > info->vaddr || + vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; if (is_register) { -- cgit v1.2.3 From 57683f72b8c01c53c85fe13e82fe1feb3a06bcd5 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:47 +0200 Subject: uprobes: Rename vma_address() and make it return "unsigned long" 1. vma_address() returns loff_t, this looks confusing and this is unnecessary after the previous change. Make it return "ulong", all callers truncate the result anyway. 2. Its name conflicts with mm/rmap.c:vma_address(), rename it to offset_to_vaddr(), this matches vaddr_to_offset(). Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182247.GA20365@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index cdc3c951251c..bb30a4fc5050 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -112,14 +112,9 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) return false; } -static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) +static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) { - loff_t vaddr; - - vaddr = vma->vm_start + offset; - vaddr -= (loff_t)vma->vm_pgoff << PAGE_SHIFT; - - return vaddr; + return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); } static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) @@ -775,7 +770,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) curr = info; info->mm = vma->vm_mm; - info->vaddr = vma_address(vma, offset); + info->vaddr = offset_to_vaddr(vma, offset); } mutex_unlock(&mapping->i_mmap_mutex); @@ -1042,7 +1037,7 @@ int uprobe_mmap(struct vm_area_struct *vma) list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { if (!ret) { - loff_t vaddr = vma_address(vma, uprobe->offset); + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); /* @@ -1103,7 +1098,7 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon build_probe_list(inode, vma, start, end, &tmp_list); list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - loff_t vaddr = vma_address(vma, uprobe->offset); + unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); /* * An unregister could have removed the probe before * unmap. So check before we decrement the count. -- cgit v1.2.3 From 194f8dcbe9629d8e9346cf96345a9c0bbf0e67ae Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 29 Jul 2012 20:22:49 +0200 Subject: uprobes: __replace_page() needs munlock_vma_page() Like do_wp_page(), __replace_page() should do munlock_vma_page() for the case when the old page still has other !VM_LOCKED mappings. Unfortunately this needs mm/internal.h. Also, move put_page() outside of ptl lock. This doesn't really matter but looks a bit better. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Cc: Anton Arapov Cc: Srikar Dronamraju Link: http://lkml.kernel.org/r/20120729182249.GA20372@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index bb30a4fc5050..c08a22d02f72 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -32,6 +32,7 @@ #include /* try_to_free_swap */ #include /* user_enable_single_step */ #include /* notifier mechanism */ +#include "../../mm/internal.h" /* munlock_vma_page */ #include @@ -141,7 +142,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep; int err; - /* freeze PageSwapCache() for try_to_free_swap() below */ + /* For try_to_free_swap() and munlock_vma_page() below */ lock_page(page); err = -EAGAIN; @@ -164,9 +165,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, page_remove_rmap(page); if (!page_mapped(page)) try_to_free_swap(page); - put_page(page); pte_unmap_unlock(ptep, ptl); + if (vma->vm_flags & VM_LOCKED) + munlock_vma_page(page); + put_page(page); + err = 0; unlock: unlock_page(page); -- cgit v1.2.3