summaryrefslogtreecommitdiff
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c60
1 files changed, 46 insertions, 14 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2ca797cbe465..8d783b5882b6 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -173,6 +173,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0);
int err;
struct mmu_notifier_range range;
+ pte_t pte;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr,
addr + PAGE_SIZE);
@@ -192,6 +193,16 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (!page_vma_mapped_walk(&pvmw))
goto unlock;
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
+ pte = ptep_get(pvmw.pte);
+
+ /*
+ * Handle PFN swap PTES, such as device-exclusive ones, that actually
+ * map pages: simply trigger GUP again to fix it up.
+ */
+ if (unlikely(!pte_present(pte))) {
+ page_vma_mapped_walk_done(&pvmw);
+ goto unlock;
+ }
if (new_page) {
folio_get(new_folio);
@@ -206,7 +217,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
inc_mm_counter(mm, MM_ANONPAGES);
}
- flush_cache_page(vma, addr, pte_pfn(ptep_get(pvmw.pte)));
+ flush_cache_page(vma, addr, pte_pfn(pte));
ptep_clear_flush(vma, addr, pvmw.pte);
if (new_page)
set_pte_at(mm, addr, pvmw.pte,
@@ -417,7 +428,7 @@ static void update_ref_ctr_warn(struct uprobe *uprobe,
struct mm_struct *mm, short d)
{
pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
- "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+ "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n",
d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
(unsigned long long) uprobe->offset,
(unsigned long long) uprobe->ref_ctr_offset, mm);
@@ -495,6 +506,11 @@ retry:
if (ret <= 0)
goto put_old;
+ if (is_zero_page(old_page)) {
+ ret = -EINVAL;
+ goto put_old;
+ }
+
if (WARN(!is_register && PageCompound(old_page),
"uprobe unregister should never work on compound page\n")) {
ret = -EINVAL;
@@ -762,10 +778,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
enum hprobe_state hstate;
/*
- * return_instance's hprobe is protected by RCU.
- * Underlying uprobe is itself protected from reuse by SRCU.
+ * Caller should guarantee that return_instance is not going to be
+ * freed from under us. This can be achieved either through holding
+ * rcu_read_lock() or by owning return_instance in the first place.
+ *
+ * Underlying uprobe is itself protected from reuse by SRCU, so ensure
+ * SRCU lock is held properly.
*/
- lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
+ lockdep_assert(srcu_read_lock_held(&uretprobes_srcu));
hstate = READ_ONCE(hprobe->state);
switch (hstate) {
@@ -1683,7 +1703,8 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
}
vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
- VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
+ VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO|
+ VM_SEALED_SYSMAP,
&xol_mapping);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -1935,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask,
* to-be-reused return instances for future uretprobes. If ri_timer()
* happens to be running right now, though, we fallback to safety and
* just perform RCU-delated freeing of ri.
+ * Admittedly, this is a rather simple use of seqcount, but it nicely
+ * abstracts away all the necessary memory barriers, so we use
+ * a well-supported kernel primitive here.
*/
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
/* immediate reuse of ri without RCU GP is OK */
@@ -1995,12 +2019,20 @@ static void ri_timer(struct timer_list *timer)
/* RCU protects return_instance from freeing. */
guard(rcu)();
- write_seqcount_begin(&utask->ri_seqcount);
+ /*
+ * See free_ret_instance() for notes on seqcount use.
+ * We also employ raw API variants to avoid lockdep false-positive
+ * warning complaining about enabled preemption. The timer can only be
+ * invoked once for a uprobe_task. Therefore there can only be one
+ * writer. The reader does not require an even sequence count to make
+ * progress, so it is OK to remain preemptible on PREEMPT_RT.
+ */
+ raw_write_seqcount_begin(&utask->ri_seqcount);
for_each_ret_instance_rcu(ri, utask->return_instances)
hprobe_expire(&ri->hprobe, false);
- write_seqcount_end(&utask->ri_seqcount);
+ raw_write_seqcount_end(&utask->ri_seqcount);
}
static struct uprobe_task *alloc_utask(void)
@@ -2160,8 +2192,8 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
*/
unsigned long uprobe_get_trampoline_vaddr(void)
{
+ unsigned long trampoline_vaddr = UPROBE_NO_TRAMPOLINE_VADDR;
struct xol_area *area;
- unsigned long trampoline_vaddr = -1;
/* Pairs with xol_add_vma() smp_store_release() */
area = READ_ONCE(current->mm->uprobes_state.xol_area); /* ^^^ */
@@ -2302,9 +2334,8 @@ bool uprobe_deny_signal(void)
WARN_ON_ONCE(utask->state != UTASK_SSTEP);
if (task_sigpending(t)) {
- spin_lock_irq(&t->sighand->siglock);
+ utask->signal_denied = true;
clear_tsk_thread_flag(t, TIF_SIGPENDING);
- spin_unlock_irq(&t->sighand->siglock);
if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
utask->state = UTASK_SSTEP_TRAPPED;
@@ -2737,9 +2768,10 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
utask->state = UTASK_RUNNING;
xol_free_insn_slot(utask);
- spin_lock_irq(&current->sighand->siglock);
- recalc_sigpending(); /* see uprobe_deny_signal() */
- spin_unlock_irq(&current->sighand->siglock);
+ if (utask->signal_denied) {
+ set_thread_flag(TIF_SIGPENDING);
+ utask->signal_denied = false;
+ }
if (unlikely(err)) {
uprobe_warn(current, "execute the probed insn, sending SIGILL.");