diff options
Diffstat (limited to 'kernel/futex')
| -rw-r--r-- | kernel/futex/core.c | 460 | ||||
| -rw-r--r-- | kernel/futex/futex.h | 52 | ||||
| -rw-r--r-- | kernel/futex/pi.c | 62 | ||||
| -rw-r--r-- | kernel/futex/requeue.c | 20 | ||||
| -rw-r--r-- | kernel/futex/syscalls.c | 36 | ||||
| -rw-r--r-- | kernel/futex/waitwake.c | 47 |
6 files changed, 387 insertions, 290 deletions
diff --git a/kernel/futex/core.c b/kernel/futex/core.c index ff2a4fb2993f..179b26e9c934 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -32,18 +32,21 @@ * "But they come in a choice of three flavours!" */ #include <linux/compat.h> -#include <linux/jhash.h> -#include <linux/pagemap.h> #include <linux/debugfs.h> -#include <linux/plist.h> +#include <linux/fault-inject.h> #include <linux/gfp.h> -#include <linux/vmalloc.h> +#include <linux/jhash.h> #include <linux/memblock.h> -#include <linux/fault-inject.h> -#include <linux/slab.h> -#include <linux/prctl.h> #include <linux/mempolicy.h> #include <linux/mmap_lock.h> +#include <linux/pagemap.h> +#include <linux/plist.h> +#include <linux/prctl.h> +#include <linux/rseq.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#include <vdso/futex.h> #include "futex.h" #include "../locking/rtmutex_common.h" @@ -124,7 +127,7 @@ late_initcall(fail_futex_debugfs); #endif /* CONFIG_FAIL_FUTEX */ static struct futex_hash_bucket * -__futex_hash(union futex_key *key, struct futex_private_hash *fph); +__futex_hash(union futex_key *key, struct futex_private_hash *fph, struct futex_private_hash **fph_p); #ifdef CONFIG_FUTEX_PRIVATE_HASH static bool futex_ref_get(struct futex_private_hash *fph); @@ -133,15 +136,6 @@ static bool futex_ref_is_dead(struct futex_private_hash *fph); enum { FR_PERCPU = 0, FR_ATOMIC }; -static inline bool futex_key_is_private(union futex_key *key) -{ - /* - * Relies on get_futex_key() to set either bit for shared - * futexes -- see comment with union futex_key. - */ - return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); -} - static bool futex_private_hash_get(struct futex_private_hash *fph) { return futex_ref_get(fph); @@ -149,51 +143,18 @@ static bool futex_private_hash_get(struct futex_private_hash *fph) void futex_private_hash_put(struct futex_private_hash *fph) { - if (futex_ref_put(fph)) + if (fph && futex_ref_put(fph)) wake_up_var(fph->mm); } -/** - * futex_hash_get - Get an additional reference for the local hash. - * @hb: ptr to the private local hash. - * - * Obtain an additional reference for the already obtained hash bucket. The - * caller must already own an reference. - */ -void futex_hash_get(struct futex_hash_bucket *hb) -{ - struct futex_private_hash *fph = hb->priv; - - if (!fph) - return; - WARN_ON_ONCE(!futex_private_hash_get(fph)); -} - -void futex_hash_put(struct futex_hash_bucket *hb) -{ - struct futex_private_hash *fph = hb->priv; - - if (!fph) - return; - futex_private_hash_put(fph); -} - static struct futex_hash_bucket * __futex_hash_private(union futex_key *key, struct futex_private_hash *fph) { u32 hash; - if (!futex_key_is_private(key)) - return NULL; - - if (!fph) - fph = rcu_dereference(key->private.mm->futex_phash); - if (!fph || !fph->hash_mask) - return NULL; - - hash = jhash2((void *)&key->private.address, - sizeof(key->private.address) / 4, + hash = jhash2((void *)&key->private.address, sizeof(key->private.address) / 4, key->both.offset); + return &fph->queues[hash & fph->hash_mask]; } @@ -211,13 +172,12 @@ static void futex_rehash_private(struct futex_private_hash *old, spin_lock(&hb_old->lock); plist_for_each_entry_safe(this, tmp, &hb_old->chain, list) { - plist_del(&this->list, &hb_old->chain); futex_hb_waiters_dec(hb_old); WARN_ON_ONCE(this->lock_ptr != &hb_old->lock); - hb_new = __futex_hash(&this->key, new); + hb_new = __futex_hash(&this->key, new, NULL); futex_hb_waiters_inc(hb_new); /* * The new pointer isn't published yet but an already @@ -232,18 +192,17 @@ static void futex_rehash_private(struct futex_private_hash *old, } } -static bool __futex_pivot_hash(struct mm_struct *mm, - struct futex_private_hash *new) +static bool __futex_pivot_hash(struct mm_struct *mm, struct futex_private_hash *new) { + struct futex_mm_phash *mmph = &mm->futex.phash; struct futex_private_hash *fph; - WARN_ON_ONCE(mm->futex_phash_new); + WARN_ON_ONCE(mmph->hash_new); - fph = rcu_dereference_protected(mm->futex_phash, - lockdep_is_held(&mm->futex_hash_lock)); + fph = rcu_dereference_protected(mmph->hash, lockdep_is_held(&mmph->lock)); if (fph) { if (!futex_ref_is_dead(fph)) { - mm->futex_phash_new = new; + mmph->hash_new = new; return false; } @@ -251,8 +210,8 @@ static bool __futex_pivot_hash(struct mm_struct *mm, } new->state = FR_PERCPU; scoped_guard(rcu) { - mm->futex_batches = get_state_synchronize_rcu(); - rcu_assign_pointer(mm->futex_phash, new); + mmph->batches = get_state_synchronize_rcu(); + rcu_assign_pointer(mmph->hash, new); } kvfree_rcu(fph, rcu); return true; @@ -260,20 +219,19 @@ static bool __futex_pivot_hash(struct mm_struct *mm, static void futex_pivot_hash(struct mm_struct *mm) { - scoped_guard(mutex, &mm->futex_hash_lock) { + scoped_guard(mutex, &mm->futex.phash.lock) { struct futex_private_hash *fph; - fph = mm->futex_phash_new; + fph = mm->futex.phash.hash_new; if (fph) { - mm->futex_phash_new = NULL; + mm->futex.phash.hash_new = NULL; __futex_pivot_hash(mm, fph); } } } -struct futex_private_hash *futex_private_hash(void) +struct futex_private_hash *futex_private_hash(struct mm_struct *mm) { - struct mm_struct *mm = current->mm; /* * Ideally we don't loop. If there is a replacement in progress * then a new private hash is already prepared and a reference can't be @@ -288,7 +246,7 @@ again: scoped_guard(rcu) { struct futex_private_hash *fph; - fph = rcu_dereference(mm->futex_phash); + fph = rcu_dereference(mm->futex.phash.hash); if (!fph) return NULL; @@ -299,18 +257,17 @@ again: goto again; } -struct futex_hash_bucket *futex_hash(union futex_key *key) +struct futex_bucket_ref futex_hash(union futex_key *key) { - struct futex_private_hash *fph; - struct futex_hash_bucket *hb; - again: scoped_guard(rcu) { - hb = __futex_hash(key, NULL); - fph = hb->priv; + struct futex_private_hash *fph = NULL; + struct futex_hash_bucket *hb; + + hb = __futex_hash(key, NULL, &fph); if (!fph || futex_private_hash_get(fph)) - return hb; + return (struct futex_bucket_ref){ .hb = hb, .fph = fph }; } futex_pivot_hash(key->private.mm); goto again; @@ -318,15 +275,9 @@ again: #else /* !CONFIG_FUTEX_PRIVATE_HASH */ -static struct futex_hash_bucket * -__futex_hash_private(union futex_key *key, struct futex_private_hash *fph) +struct futex_bucket_ref futex_hash(union futex_key *key) { - return NULL; -} - -struct futex_hash_bucket *futex_hash(union futex_key *key) -{ - return __futex_hash(key, NULL); + return (struct futex_bucket_ref){ .hb = __futex_hash(key, NULL, NULL), .fph = NULL }; } #endif /* CONFIG_FUTEX_PRIVATE_HASH */ @@ -404,6 +355,8 @@ static int futex_mpol(struct mm_struct *mm, unsigned long addr) * __futex_hash - Return the hash bucket * @key: Pointer to the futex key for which the hash is calculated * @fph: Pointer to private hash if known + * @fph_p: Pointer to a private hash pointer; output for the private hash + * used when set. * * We hash on the keys returned from get_futex_key (see below) and return the * corresponding hash bucket. @@ -412,21 +365,24 @@ static int futex_mpol(struct mm_struct *mm, unsigned long addr) * global hash is returned. */ static struct futex_hash_bucket * -__futex_hash(union futex_key *key, struct futex_private_hash *fph) +__futex_hash(union futex_key *key, struct futex_private_hash *fph, struct futex_private_hash **fph_p) { int node = key->both.node; u32 hash; - if (node == FUTEX_NO_NODE) { - struct futex_hash_bucket *hb; - - hb = __futex_hash_private(key, fph); - if (hb) - return hb; +#ifdef CONFIG_FUTEX_PRIVATE_HASH + if (node == FUTEX_NO_NODE && futex_key_is_private(key)) { + if (!fph) + fph = rcu_dereference(key->private.mm->futex.phash.hash); + if (fph && fph->hash_mask) { + if (fph_p) + *fph_p = fph; + return __futex_hash_private(key, fph); + } } +#endif - hash = jhash2((u32 *)key, - offsetof(typeof(*key), both.offset) / sizeof(u32), + hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / sizeof(u32), key->both.offset); if (node == FUTEX_NO_NODE) { @@ -441,8 +397,7 @@ __futex_hash(union futex_key *key, struct futex_private_hash *fph) */ node = (hash >> futex_hashshift) % nr_node_ids; if (!node_possible(node)) { - node = find_next_bit_wrap(node_possible_map.bits, - nr_node_ids, node); + node = find_next_bit_wrap(node_possible_map.bits, nr_node_ids, node); } } @@ -459,9 +414,8 @@ __futex_hash(union futex_key *key, struct futex_private_hash *fph) * Return: Initialized hrtimer_sleeper structure or NULL if no timeout * value given */ -struct hrtimer_sleeper * -futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, - int flags, u64 range_ns) +struct hrtimer_sleeper *futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, + int flags, u64 range_ns) { if (!time) return NULL; @@ -829,7 +783,7 @@ void wait_for_owner_exiting(int ret, struct task_struct *exiting) if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) return; - mutex_lock(&exiting->futex_exit_mutex); + mutex_lock(&exiting->futex.exit_mutex); /* * No point in doing state checking here. If the waiter got here * while the task was in exec()->exec_futex_release() then it can @@ -838,7 +792,7 @@ void wait_for_owner_exiting(int ret, struct task_struct *exiting) * already. Highly unlikely and not a problem. Just one more round * through the futex maze. */ - mutex_unlock(&exiting->futex_exit_mutex); + mutex_unlock(&exiting->futex.exit_mutex); put_task_struct(exiting); } @@ -1012,8 +966,9 @@ void futex_unqueue_pi(struct futex_q *q) * dying task, and do notification if so: */ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, - bool pi, bool pending_op) + unsigned int mod, bool pending_op) { + bool pi = !!(mod & FUTEX_ROBUST_MOD_PI); u32 uval, nval, mval; pid_t owner; int err; @@ -1047,7 +1002,7 @@ retry: * * In both cases the following conditions are met: * - * 1) task->robust_list->list_op_pending != NULL + * 1) task->futex.robust_list->list_op_pending != NULL * @pending_op == true * 2) The owner part of user space futex value == 0 * 3) Regular futex: @pi == false @@ -1065,7 +1020,7 @@ retry: owner = uval & FUTEX_TID_MASK; if (pending_op && !pi && !owner) { - futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, NULL, 1, FUTEX_BITSET_MATCH_ANY); return 0; } @@ -1119,7 +1074,7 @@ retry: * PI futexes happens in exit_pi_state(): */ if (!pi && (uval & FUTEX_WAITERS)) { - futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, 1, + futex_wake(uaddr, FLAGS_SIZE_32 | FLAGS_SHARED, NULL, 1, FUTEX_BITSET_MATCH_ANY); } @@ -1131,31 +1086,30 @@ retry: */ static inline int fetch_robust_entry(struct robust_list __user **entry, struct robust_list __user * __user *head, - unsigned int *pi) + unsigned int *mod) { unsigned long uentry; if (get_user(uentry, (unsigned long __user *)head)) return -EFAULT; - *entry = (void __user *)(uentry & ~1UL); - *pi = uentry & 1; + *entry = (void __user *)(uentry & ~FUTEX_ROBUST_MOD_MASK); + *mod = uentry & FUTEX_ROBUST_MOD_MASK; return 0; } /* - * Walk curr->robust_list (very carefully, it's a userspace list!) + * Walk curr->futex.robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */ static void exit_robust_list(struct task_struct *curr) { - struct robust_list_head __user *head = curr->robust_list; + struct robust_list_head __user *head = curr->futex.robust_list; + unsigned int limit = ROBUST_LIST_LIMIT, cur_mod, next_mod, pend_mod; struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int next_pi; unsigned long futex_offset; int rc; @@ -1163,7 +1117,7 @@ static void exit_robust_list(struct task_struct *curr) * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (fetch_robust_entry(&entry, &head->list.next, &pi)) + if (fetch_robust_entry(&entry, &head->list.next, &cur_mod)) return; /* * Fetch the relative futex offset: @@ -1174,7 +1128,7 @@ static void exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) + if (fetch_robust_entry(&pending, &head->list_op_pending, &pend_mod)) return; next_entry = NULL; /* avoid warning with gcc */ @@ -1183,20 +1137,20 @@ static void exit_robust_list(struct task_struct *curr) * Fetch the next entry in the list before calling * handle_futex_death: */ - rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); + rc = fetch_robust_entry(&next_entry, &entry->next, &next_mod); /* * A pending lock might already be on the list, so * don't process it twice: */ if (entry != pending) { if (handle_futex_death((void __user *)entry + futex_offset, - curr, pi, HANDLE_DEATH_LIST)) + curr, cur_mod, HANDLE_DEATH_LIST)) return; } if (rc) return; entry = next_entry; - pi = next_pi; + cur_mod = next_mod; /* * Avoid excessively long or circular lists: */ @@ -1208,10 +1162,31 @@ static void exit_robust_list(struct task_struct *curr) if (pending) { handle_futex_death((void __user *)pending + futex_offset, - curr, pip, HANDLE_DEATH_PENDING); + curr, pend_mod, HANDLE_DEATH_PENDING); } } +static bool robust_list_clear_pending(unsigned long __user *pop) +{ + struct robust_list_head __user *head = current->futex.robust_list; + + if (!put_user(0UL, pop)) + return true; + + /* + * Just give up. The robust list head is usually part of TLS, so the + * chance that this gets resolved is close to zero. + * + * If @pop_addr is the robust_list_head::list_op_pending pointer then + * clear the robust list head pointer to prevent further damage when the + * task exits. Better a few stale futexes than corrupted memory. But + * that's mostly an academic exercise. + */ + if (pop == (unsigned long __user *)&head->list_op_pending) + current->futex.robust_list = NULL; + return false; +} + #ifdef CONFIG_COMPAT static void __user *futex_uaddr(struct robust_list __user *entry, compat_long_t futex_offset) @@ -1227,29 +1202,28 @@ static void __user *futex_uaddr(struct robust_list __user *entry, */ static inline int compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t __user *head, unsigned int *pi) + compat_uptr_t __user *head, unsigned int *pflags) { if (get_user(*uentry, head)) return -EFAULT; - *entry = compat_ptr((*uentry) & ~1); - *pi = (unsigned int)(*uentry) & 1; + *entry = compat_ptr((*uentry) & ~FUTEX_ROBUST_MOD_MASK); + *pflags = (unsigned int)(*uentry) & FUTEX_ROBUST_MOD_MASK; return 0; } /* - * Walk curr->robust_list (very carefully, it's a userspace list!) + * Walk curr->futex.robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * * We silently return on any sign of list-walking problem. */ static void compat_exit_robust_list(struct task_struct *curr) { - struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct compat_robust_list_head __user *head = current->futex.compat_robust_list; + unsigned int limit = ROBUST_LIST_LIMIT, cur_mod, next_mod, pend_mod; struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int next_pi; compat_uptr_t uentry, next_uentry, upending; compat_long_t futex_offset; int rc; @@ -1258,7 +1232,7 @@ static void compat_exit_robust_list(struct task_struct *curr) * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &cur_mod)) return; /* * Fetch the relative futex offset: @@ -1269,8 +1243,7 @@ static void compat_exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (compat_fetch_robust_entry(&upending, &pending, - &head->list_op_pending, &pip)) + if (compat_fetch_robust_entry(&upending, &pending, &head->list_op_pending, &pend_mod)) return; next_entry = NULL; /* avoid warning with gcc */ @@ -1280,7 +1253,7 @@ static void compat_exit_robust_list(struct task_struct *curr) * handle_futex_death: */ rc = compat_fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); + (compat_uptr_t __user *)&entry->next, &next_mod); /* * A pending lock might already be on the list, so * dont process it twice: @@ -1288,15 +1261,14 @@ static void compat_exit_robust_list(struct task_struct *curr) if (entry != pending) { void __user *uaddr = futex_uaddr(entry, futex_offset); - if (handle_futex_death(uaddr, curr, pi, - HANDLE_DEATH_LIST)) + if (handle_futex_death(uaddr, curr, cur_mod, HANDLE_DEATH_LIST)) return; } if (rc) return; uentry = next_uentry; entry = next_entry; - pi = next_pi; + cur_mod = next_mod; /* * Avoid excessively long or circular lists: */ @@ -1308,9 +1280,24 @@ static void compat_exit_robust_list(struct task_struct *curr) if (pending) { void __user *uaddr = futex_uaddr(pending, futex_offset); - handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); + handle_futex_death(uaddr, curr, pend_mod, HANDLE_DEATH_PENDING); } } + +static bool compat_robust_list_clear_pending(u32 __user *pop) +{ + struct compat_robust_list_head __user *head = current->futex.compat_robust_list; + + if (!put_user(0U, pop)) + return true; + + /* See comment in robust_list_clear_pending(). */ + if (pop == &head->list_op_pending) + current->futex.compat_robust_list = NULL; + return false; +} +#else +static bool compat_robust_list_clear_pending(u32 __user *pop_addr) { return false; } #endif #ifdef CONFIG_FUTEX_PI @@ -1322,7 +1309,7 @@ static void compat_exit_robust_list(struct task_struct *curr) */ static void exit_pi_state_list(struct task_struct *curr) { - struct list_head *next, *head = &curr->pi_state_list; + struct list_head *next, *head = &curr->futex.pi_state_list; struct futex_pi_state *pi_state; union futex_key key = FUTEX_KEY_INIT; @@ -1336,7 +1323,7 @@ static void exit_pi_state_list(struct task_struct *curr) * on the mutex. */ WARN_ON(curr != current); - guard(private_hash)(); + guard(private_hash)(current->mm); /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful @@ -1348,7 +1335,8 @@ static void exit_pi_state_list(struct task_struct *curr) pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; if (1) { - CLASS(hb, hb)(&key); + CLASS(hbr, hbr)(&key); + auto hb = hbr.hb; /* * We can race against put_pi_state() removing itself from the @@ -1404,21 +1392,50 @@ static void exit_pi_state_list(struct task_struct *curr) static inline void exit_pi_state_list(struct task_struct *curr) { } #endif +bool futex_robust_list_clear_pending(void __user *pop, unsigned int flags) +{ + bool size32bit = !!(flags & FLAGS_ROBUST_LIST32); + + if (!IS_ENABLED(CONFIG_64BIT) && !size32bit) + return false; + + if (IS_ENABLED(CONFIG_64BIT) && size32bit) + return compat_robust_list_clear_pending(pop); + + return robust_list_clear_pending(pop); +} + +#ifdef CONFIG_FUTEX_ROBUST_UNLOCK +void __futex_fixup_robust_unlock(struct pt_regs *regs, struct futex_unlock_cs_range *csr) +{ + /* + * arch_futex_robust_unlock_get_pop() returns the list pending op pointer from + * @regs if the try_cmpxchg() succeeded. + */ + void __user *pop = arch_futex_robust_unlock_get_pop(regs); + + if (!pop) + return; + + futex_robust_list_clear_pending(pop, csr->pop_size32 ? FLAGS_ROBUST_LIST32 : 0); +} +#endif /* CONFIG_FUTEX_ROBUST_UNLOCK */ + static void futex_cleanup(struct task_struct *tsk) { - if (unlikely(tsk->robust_list)) { + if (unlikely(tsk->futex.robust_list)) { exit_robust_list(tsk); - tsk->robust_list = NULL; + tsk->futex.robust_list = NULL; } #ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { + if (unlikely(tsk->futex.compat_robust_list)) { compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; + tsk->futex.compat_robust_list = NULL; } #endif - if (unlikely(!list_empty(&tsk->pi_state_list))) + if (unlikely(!list_empty(&tsk->futex.pi_state_list))) exit_pi_state_list(tsk); } @@ -1442,23 +1459,23 @@ static void futex_cleanup(struct task_struct *tsk) void futex_exit_recursive(struct task_struct *tsk) { /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ - if (tsk->futex_state == FUTEX_STATE_EXITING) { - __assume_ctx_lock(&tsk->futex_exit_mutex); - mutex_unlock(&tsk->futex_exit_mutex); + if (tsk->futex.state == FUTEX_STATE_EXITING) { + __assume_ctx_lock(&tsk->futex.exit_mutex); + mutex_unlock(&tsk->futex.exit_mutex); } - tsk->futex_state = FUTEX_STATE_DEAD; + tsk->futex.state = FUTEX_STATE_DEAD; } static void futex_cleanup_begin(struct task_struct *tsk) - __acquires(&tsk->futex_exit_mutex) + __acquires(&tsk->futex.exit_mutex) { /* * Prevent various race issues against a concurrent incoming waiter * including live locks by forcing the waiter to block on - * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in + * tsk->futex.exit_mutex when it observes FUTEX_STATE_EXITING in * attach_to_pi_owner(). */ - mutex_lock(&tsk->futex_exit_mutex); + mutex_lock(&tsk->futex.exit_mutex); /* * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. @@ -1472,23 +1489,23 @@ static void futex_cleanup_begin(struct task_struct *tsk) * be observed in exit_pi_state_list(). */ raw_spin_lock_irq(&tsk->pi_lock); - tsk->futex_state = FUTEX_STATE_EXITING; + tsk->futex.state = FUTEX_STATE_EXITING; raw_spin_unlock_irq(&tsk->pi_lock); } static void futex_cleanup_end(struct task_struct *tsk, int state) - __releases(&tsk->futex_exit_mutex) + __releases(&tsk->futex.exit_mutex) { /* * Lockless store. The only side effect is that an observer might * take another loop until it becomes visible. */ - tsk->futex_state = state; + tsk->futex.state = state; /* * Drop the exit protection. This unblocks waiters which observed * FUTEX_STATE_EXITING to reevaluate the state. */ - mutex_unlock(&tsk->futex_exit_mutex); + mutex_unlock(&tsk->futex.exit_mutex); } void futex_exec_release(struct task_struct *tsk) @@ -1516,12 +1533,8 @@ void futex_exit_release(struct task_struct *tsk) futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } -static void futex_hash_bucket_init(struct futex_hash_bucket *fhb, - struct futex_private_hash *fph) +static void futex_hash_bucket_init(struct futex_hash_bucket *fhb) { -#ifdef CONFIG_FUTEX_PRIVATE_HASH - fhb->priv = fph; -#endif atomic_set(&fhb->waiters, 0); plist_head_init(&fhb->chain); spin_lock_init(&fhb->lock); @@ -1553,17 +1566,17 @@ static void __futex_ref_atomic_begin(struct futex_private_hash *fph) * otherwise it would be impossible for it to have reported success * from futex_ref_is_dead(). */ - WARN_ON_ONCE(atomic_long_read(&mm->futex_atomic) != 0); + WARN_ON_ONCE(atomic_long_read(&mm->futex.phash.atomic) != 0); /* * Set the atomic to the bias value such that futex_ref_{get,put}() * will never observe 0. Will be fixed up in __futex_ref_atomic_end() * when folding in the percpu count. */ - atomic_long_set(&mm->futex_atomic, LONG_MAX); + atomic_long_set(&mm->futex.phash.atomic, LONG_MAX); smp_store_release(&fph->state, FR_ATOMIC); - call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); + call_rcu_hurry(&mm->futex.phash.rcu, futex_ref_rcu); } static void __futex_ref_atomic_end(struct futex_private_hash *fph) @@ -1584,7 +1597,7 @@ static void __futex_ref_atomic_end(struct futex_private_hash *fph) * Therefore the per-cpu counter is now stable, sum and reset. */ for_each_possible_cpu(cpu) { - unsigned int *ptr = per_cpu_ptr(mm->futex_ref, cpu); + unsigned int *ptr = per_cpu_ptr(mm->futex.phash.ref, cpu); count += *ptr; *ptr = 0; } @@ -1592,7 +1605,7 @@ static void __futex_ref_atomic_end(struct futex_private_hash *fph) /* * Re-init for the next cycle. */ - this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + this_cpu_inc(*mm->futex.phash.ref); /* 0 -> 1 */ /* * Add actual count, subtract bias and initial refcount. @@ -1600,7 +1613,7 @@ static void __futex_ref_atomic_end(struct futex_private_hash *fph) * The moment this atomic operation happens, futex_ref_is_dead() can * become true. */ - ret = atomic_long_add_return(count - LONG_MAX - 1, &mm->futex_atomic); + ret = atomic_long_add_return(count - LONG_MAX - 1, &mm->futex.phash.atomic); if (!ret) wake_up_var(mm); @@ -1610,8 +1623,8 @@ static void __futex_ref_atomic_end(struct futex_private_hash *fph) static void futex_ref_rcu(struct rcu_head *head) { - struct mm_struct *mm = container_of(head, struct mm_struct, futex_rcu); - struct futex_private_hash *fph = rcu_dereference_raw(mm->futex_phash); + struct mm_struct *mm = container_of(head, struct mm_struct, futex.phash.rcu); + struct futex_private_hash *fph = rcu_dereference_raw(mm->futex.phash.hash); if (fph->state == FR_PERCPU) { /* @@ -1640,7 +1653,7 @@ static void futex_ref_drop(struct futex_private_hash *fph) /* * Can only transition the current fph; */ - WARN_ON_ONCE(rcu_dereference_raw(mm->futex_phash) != fph); + WARN_ON_ONCE(rcu_dereference_raw(mm->futex.phash.hash) != fph); /* * We enqueue at least one RCU callback. Ensure mm stays if the task * exits before the transition is completed. @@ -1651,9 +1664,9 @@ static void futex_ref_drop(struct futex_private_hash *fph) * In order to avoid the following scenario: * * futex_hash() __futex_pivot_hash() - * guard(rcu); guard(mm->futex_hash_lock); - * fph = mm->futex_phash; - * rcu_assign_pointer(&mm->futex_phash, new); + * guard(rcu); guard(mm->futex.phash.lock); + * fph = mm->futex.phash.hash; + * rcu_assign_pointer(&mm->futex.phash.hash, new); * futex_hash_allocate() * futex_ref_drop() * fph->state = FR_ATOMIC; @@ -1668,7 +1681,7 @@ static void futex_ref_drop(struct futex_private_hash *fph) * There must be at least one full grace-period between publishing a * new fph and trying to replace it. */ - if (poll_state_synchronize_rcu(mm->futex_batches)) { + if (poll_state_synchronize_rcu(mm->futex.phash.batches)) { /* * There was a grace-period, we can begin now. */ @@ -1676,7 +1689,7 @@ static void futex_ref_drop(struct futex_private_hash *fph) return; } - call_rcu_hurry(&mm->futex_rcu, futex_ref_rcu); + call_rcu_hurry(&mm->futex.phash.rcu, futex_ref_rcu); } static bool futex_ref_get(struct futex_private_hash *fph) @@ -1686,11 +1699,11 @@ static bool futex_ref_get(struct futex_private_hash *fph) guard(preempt)(); if (READ_ONCE(fph->state) == FR_PERCPU) { - __this_cpu_inc(*mm->futex_ref); + __this_cpu_inc(*mm->futex.phash.ref); return true; } - return atomic_long_inc_not_zero(&mm->futex_atomic); + return atomic_long_inc_not_zero(&mm->futex.phash.atomic); } static bool futex_ref_put(struct futex_private_hash *fph) @@ -1700,11 +1713,11 @@ static bool futex_ref_put(struct futex_private_hash *fph) guard(preempt)(); if (READ_ONCE(fph->state) == FR_PERCPU) { - __this_cpu_dec(*mm->futex_ref); + __this_cpu_dec(*mm->futex.phash.ref); return false; } - return atomic_long_dec_and_test(&mm->futex_atomic); + return atomic_long_dec_and_test(&mm->futex.phash.atomic); } static bool futex_ref_is_dead(struct futex_private_hash *fph) @@ -1716,28 +1729,23 @@ static bool futex_ref_is_dead(struct futex_private_hash *fph) if (smp_load_acquire(&fph->state) == FR_PERCPU) return false; - return atomic_long_read(&mm->futex_atomic) == 0; + return atomic_long_read(&mm->futex.phash.atomic) == 0; } -int futex_mm_init(struct mm_struct *mm) +static void futex_hash_init_mm(struct futex_mm_data *fd) { - mutex_init(&mm->futex_hash_lock); - RCU_INIT_POINTER(mm->futex_phash, NULL); - mm->futex_phash_new = NULL; - /* futex-ref */ - mm->futex_ref = NULL; - atomic_long_set(&mm->futex_atomic, 0); - mm->futex_batches = get_state_synchronize_rcu(); - return 0; + memset(&fd->phash, 0, sizeof(fd->phash)); + mutex_init(&fd->phash.lock); + fd->phash.batches = get_state_synchronize_rcu(); } void futex_hash_free(struct mm_struct *mm) { struct futex_private_hash *fph; - free_percpu(mm->futex_ref); - kvfree(mm->futex_phash_new); - fph = rcu_dereference_raw(mm->futex_phash); + free_percpu(mm->futex.phash.ref); + kvfree(mm->futex.phash.hash_new); + fph = rcu_dereference_raw(mm->futex.phash.hash); if (fph) kvfree(fph); } @@ -1748,10 +1756,10 @@ static bool futex_pivot_pending(struct mm_struct *mm) guard(rcu)(); - if (!mm->futex_phash_new) + if (!mm->futex.phash.hash_new) return true; - fph = rcu_dereference(mm->futex_phash); + fph = rcu_dereference(mm->futex.phash.hash); return futex_ref_is_dead(fph); } @@ -1793,7 +1801,7 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) * Once we've disabled the global hash there is no way back. */ scoped_guard(rcu) { - fph = rcu_dereference(mm->futex_phash); + fph = rcu_dereference(mm->futex.phash.hash); if (fph && !fph->hash_mask) { if (custom) return -EBUSY; @@ -1801,15 +1809,15 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) } } - if (!mm->futex_ref) { + if (!mm->futex.phash.ref) { /* * This will always be allocated by the first thread and * therefore requires no locking. */ - mm->futex_ref = alloc_percpu(unsigned int); - if (!mm->futex_ref) + mm->futex.phash.ref = alloc_percpu(unsigned int); + if (!mm->futex.phash.ref) return -ENOMEM; - this_cpu_inc(*mm->futex_ref); /* 0 -> 1 */ + this_cpu_inc(*mm->futex.phash.ref); /* 0 -> 1 */ } fph = kvzalloc(struct_size(fph, queues, hash_slots), @@ -1822,7 +1830,7 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) fph->mm = mm; for (i = 0; i < hash_slots; i++) - futex_hash_bucket_init(&fph->queues[i], fph); + futex_hash_bucket_init(&fph->queues[i]); if (custom) { /* @@ -1832,14 +1840,14 @@ again: wait_var_event(mm, futex_pivot_pending(mm)); } - scoped_guard(mutex, &mm->futex_hash_lock) { + scoped_guard(mutex, &mm->futex.phash.lock) { struct futex_private_hash *free __free(kvfree) = NULL; struct futex_private_hash *cur, *new; - cur = rcu_dereference_protected(mm->futex_phash, - lockdep_is_held(&mm->futex_hash_lock)); - new = mm->futex_phash_new; - mm->futex_phash_new = NULL; + cur = rcu_dereference_protected(mm->futex.phash.hash, + lockdep_is_held(&mm->futex.phash.lock)); + new = mm->futex.phash.hash_new; + mm->futex.phash.hash_new = NULL; if (fph) { if (cur && !cur->hash_mask) { @@ -1849,7 +1857,7 @@ again: * the second one returns here. */ free = fph; - mm->futex_phash_new = new; + mm->futex.phash.hash_new = new; return -EBUSY; } if (cur && !new) { @@ -1879,7 +1887,7 @@ again: if (new) { /* - * Will set mm->futex_phash_new on failure; + * Will set mm->futex.phash.new_hash on failure; * futex_private_hash_get() will try again. */ if (!__futex_pivot_hash(mm, new) && custom) @@ -1898,11 +1906,9 @@ int futex_hash_allocate_default(void) return 0; scoped_guard(rcu) { - threads = min_t(unsigned int, - get_nr_threads(current), - num_online_cpus()); + threads = min_t(unsigned int, get_nr_threads(current), num_online_cpus()); - fph = rcu_dereference(current->mm->futex_phash); + fph = rcu_dereference(current->mm->futex.phash.hash); if (fph) { if (fph->custom) return 0; @@ -1929,24 +1935,52 @@ static int futex_hash_get_slots(void) struct futex_private_hash *fph; guard(rcu)(); - fph = rcu_dereference(current->mm->futex_phash); + fph = rcu_dereference(current->mm->futex.phash.hash); if (fph && fph->hash_mask) return fph->hash_mask + 1; return 0; } +#else /* CONFIG_FUTEX_PRIVATE_HASH */ +static inline int futex_hash_allocate(unsigned int hslots, unsigned int flags) { return -EINVAL; } +static inline int futex_hash_get_slots(void) { return 0; } +static inline void futex_hash_init_mm(struct futex_mm_data *fd) { } +#endif /* !CONFIG_FUTEX_PRIVATE_HASH */ -#else +#ifdef CONFIG_FUTEX_ROBUST_UNLOCK +static void futex_invalidate_cs_ranges(struct futex_mm_data *fd) +{ + /* + * Invalidate start_ip so that the quick check fails for ip >= start_ip + * if VDSO is not mapped or the second slot is not available for compat + * tasks as they use VDSO32 which does not provide the 64-bit pointer + * variant. + */ + for (int i = 0; i < FUTEX_ROBUST_MAX_CS_RANGES; i++) + fd->unlock.cs_ranges[i].start_ip = ~0UL; +} -static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) +void futex_reset_cs_ranges(struct futex_mm_data *fd) { - return -EINVAL; + memset(fd->unlock.cs_ranges, 0, sizeof(fd->unlock.cs_ranges)); + futex_invalidate_cs_ranges(fd); } -static int futex_hash_get_slots(void) +static void futex_robust_unlock_init_mm(struct futex_mm_data *fd) { - return 0; + /* mm_dup() preserves the range, mm_alloc() clears it */ + if (!fd->unlock.cs_ranges[0].start_ip) + futex_invalidate_cs_ranges(fd); } +#else /* CONFIG_FUTEX_ROBUST_UNLOCK */ +static inline void futex_robust_unlock_init_mm(struct futex_mm_data *fd) { } +#endif /* !CONFIG_FUTEX_ROBUST_UNLOCK */ +#if defined(CONFIG_FUTEX_PRIVATE_HASH) || defined(CONFIG_FUTEX_ROBUST_UNLOCK) +void futex_mm_init(struct mm_struct *mm) +{ + futex_hash_init_mm(&mm->futex); + futex_robust_unlock_init_mm(&mm->futex); +} #endif int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) @@ -2001,7 +2035,7 @@ static int __init futex_init(void) BUG_ON(!table); for (i = 0; i < hashsize; i++) - futex_hash_bucket_init(&table[i], NULL); + futex_hash_bucket_init(&table[i]); futex_queues[n] = table; } diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 9f6bf6f585fc..f00f0863ed44 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -40,6 +40,8 @@ #define FLAGS_NUMA 0x0080 #define FLAGS_STRICT 0x0100 #define FLAGS_MPOL 0x0200 +#define FLAGS_ROBUST_UNLOCK 0x0400 +#define FLAGS_ROBUST_LIST32 0x0800 /* FUTEX_ to FLAGS_ */ static inline unsigned int futex_to_flags(unsigned int op) @@ -52,6 +54,12 @@ static inline unsigned int futex_to_flags(unsigned int op) if (op & FUTEX_CLOCK_REALTIME) flags |= FLAGS_CLOCKRT; + if (op & FUTEX_ROBUST_UNLOCK) + flags |= FLAGS_ROBUST_UNLOCK; + + if (op & FUTEX_ROBUST_LIST32) + flags |= FLAGS_ROBUST_LIST32; + return flags; } @@ -126,6 +134,15 @@ static inline bool should_fail_futex(bool fshared) } #endif +static inline bool futex_key_is_private(union futex_key *key) +{ + /* + * Relies on get_futex_key() to set either bit for shared + * futexes -- see comment with union futex_key. + */ + return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED)); +} + /* * Hash buckets are shared by all the futex_keys that hash to the same * location. Each key may have multiple futex_q structures, one for each task @@ -135,7 +152,6 @@ struct futex_hash_bucket { atomic_t waiters; spinlock_t lock; struct plist_head chain; - struct futex_private_hash *priv; } ____cacheline_aligned_in_smp; /* @@ -175,7 +191,7 @@ typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q); * @requeue_pi_key: the requeue_pi target futex key * @bitset: bitset for the optional bitmasked wakeup * @requeue_state: State field for futex_requeue_pi() - * @drop_hb_ref: Waiter should drop the extra hash bucket reference if true + * @drop_fph: Waiter should drop the extra private hash reference when set * @requeue_wait: RCU wait for futex_requeue_pi() (RT only) * * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so @@ -202,7 +218,7 @@ struct futex_q { union futex_key *requeue_pi_key; u32 bitset; atomic_t requeue_state; - bool drop_hb_ref; + struct futex_private_hash *drop_fph; #ifdef CONFIG_PREEMPT_RT struct rcuwait requeue_wait; #endif @@ -222,28 +238,29 @@ extern struct hrtimer_sleeper * futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, int flags, u64 range_ns); -extern struct futex_hash_bucket *futex_hash(union futex_key *key); -#ifdef CONFIG_FUTEX_PRIVATE_HASH -extern void futex_hash_get(struct futex_hash_bucket *hb); -extern void futex_hash_put(struct futex_hash_bucket *hb); +struct futex_bucket_ref { + struct futex_hash_bucket *hb; + struct futex_private_hash *fph; +}; -extern struct futex_private_hash *futex_private_hash(void); +#ifdef CONFIG_FUTEX_PRIVATE_HASH +extern struct futex_private_hash *futex_private_hash(struct mm_struct *mm); extern void futex_private_hash_put(struct futex_private_hash *fph); #else /* !CONFIG_FUTEX_PRIVATE_HASH */ -static inline void futex_hash_get(struct futex_hash_bucket *hb) { } -static inline void futex_hash_put(struct futex_hash_bucket *hb) { } -static inline struct futex_private_hash *futex_private_hash(void) { return NULL; } +static inline struct futex_private_hash *futex_private_hash(struct mm_struct *mm) { return NULL; } static inline void futex_private_hash_put(struct futex_private_hash *fph) { } #endif -DEFINE_CLASS(hb, struct futex_hash_bucket *, - if (_T) futex_hash_put(_T), +extern struct futex_bucket_ref futex_hash(union futex_key *key); + +DEFINE_CLASS(hbr, struct futex_bucket_ref, + if (_T.fph) futex_private_hash_put(_T.fph), futex_hash(key), union futex_key *key); DEFINE_CLASS(private_hash, struct futex_private_hash *, if (_T) futex_private_hash_put(_T), - futex_private_hash(), void); + futex_private_hash(mm), struct mm_struct *mm); /** * futex_match - Check whether two futex keys are equal @@ -449,13 +466,16 @@ extern int futex_unqueue_multiple(struct futex_vector *v, int count); extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count, struct hrtimer_sleeper *to); -extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset); +extern int futex_wake(u32 __user *uaddr, unsigned int flags, void __user *pop, + int nr_wake, u32 bitset); extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op); -extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags); +extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags, void __user *pop); extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock); +bool futex_robust_list_clear_pending(void __user *pop, unsigned int flags); + #endif /* _FUTEX_H */ diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 643199fdbe62..795011ea1202 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -14,7 +14,7 @@ int refill_pi_state_cache(void) { struct futex_pi_state *pi_state; - if (likely(current->pi_state_cache)) + if (likely(current->futex.pi_state_cache)) return 0; pi_state = kzalloc_obj(*pi_state); @@ -28,17 +28,17 @@ int refill_pi_state_cache(void) refcount_set(&pi_state->refcount, 1); pi_state->key = FUTEX_KEY_INIT; - current->pi_state_cache = pi_state; + current->futex.pi_state_cache = pi_state; return 0; } static struct futex_pi_state *alloc_pi_state(void) { - struct futex_pi_state *pi_state = current->pi_state_cache; + struct futex_pi_state *pi_state = current->futex.pi_state_cache; WARN_ON(!pi_state); - current->pi_state_cache = NULL; + current->futex.pi_state_cache = NULL; return pi_state; } @@ -60,7 +60,7 @@ static void pi_state_update_owner(struct futex_pi_state *pi_state, if (new_owner) { raw_spin_lock(&new_owner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &new_owner->pi_state_list); + list_add(&pi_state->list, &new_owner->futex.pi_state_list); pi_state->owner = new_owner; raw_spin_unlock(&new_owner->pi_lock); } @@ -96,7 +96,7 @@ void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } - if (current->pi_state_cache) { + if (current->futex.pi_state_cache) { kfree(pi_state); } else { /* @@ -106,7 +106,7 @@ void put_pi_state(struct futex_pi_state *pi_state) */ pi_state->owner = NULL; refcount_set(&pi_state->refcount, 1); - current->pi_state_cache = pi_state; + current->futex.pi_state_cache = pi_state; } } @@ -179,7 +179,7 @@ void put_pi_state(struct futex_pi_state *pi_state) * * p->pi_lock: * - * p->pi_state_list -> pi_state->list, relation + * p->futex.pi_state_list -> pi_state->list, relation * pi_mutex->owner -> pi_state->owner, relation * * pi_state->refcount: @@ -327,7 +327,7 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the * caller that the alleged owner is busy. */ - if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) + if (tsk && tsk->futex.state != FUTEX_STATE_DEAD) return -EBUSY; /* @@ -346,8 +346,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * *uaddr = 0xC0000000; tsk = get_task(PID); * } if (!tsk->flags & PF_EXITING) { * ... attach(); - * tsk->futex_state = } else { - * FUTEX_STATE_DEAD; if (tsk->futex_state != + * tsk->futex.state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex.state != * FUTEX_STATE_DEAD) * return -EAGAIN; * return -ESRCH; <--- FAIL @@ -396,7 +396,7 @@ static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, pi_state->key = *key; WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &p->pi_state_list); + list_add(&pi_state->list, &p->futex.pi_state_list); /* * Assignment without holding pi_state->pi_mutex.wait_lock is safe * because there is no concurrency as the object is not published yet. @@ -440,7 +440,7 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, * in futex_exit_release(), we do this protected by p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); - if (unlikely(p->futex_state != FUTEX_STATE_OK)) { + if (unlikely(p->futex.state != FUTEX_STATE_OK)) { /* * The task is on the way out. When the futex state is * FUTEX_STATE_DEAD, we know that the task has finished @@ -945,7 +945,8 @@ retry: retry_private: if (1) { - CLASS(hb, hb)(&q.key); + CLASS(hbr, hbr)(&q.key); + auto hb = hbr.hb; futex_q_lock(&q, hb); @@ -1009,7 +1010,7 @@ retry_private: * the thread, performing resize, will block on hb->lock during * the requeue. */ - futex_hash_put(no_free_ptr(hb)); + futex_private_hash_put(no_free_ptr(hbr.fph)); /* * Must be done before we enqueue the waiter, here is unfortunately * under the hb lock, but that *should* work because it does nothing. @@ -1100,11 +1101,9 @@ no_block: __release(&hb->lock); futex_unqueue_pi(&q); spin_unlock(q.lock_ptr); - if (q.drop_hb_ref) { - CLASS(hb, hb)(&q.key); - /* Additional reference from futex_unlock_pi() */ - futex_hash_put(hb); - } + + /* Additional reference from futex_unlock_pi() */ + futex_private_hash_put(q.drop_fph); goto out; out_unlock_put_key: @@ -1139,7 +1138,7 @@ out: * This is the in-kernel slowpath: we look up the PI state (if any), * and do the rt-mutex unlock. */ -int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) +static int __futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { u32 curval, uval, vpid = task_pid_vnr(current); union futex_key key = FUTEX_KEY_INIT; @@ -1148,7 +1147,6 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) if (!IS_ENABLED(CONFIG_FUTEX_PI)) return -ENOSYS; - retry: if (get_user(uval, uaddr)) return -EFAULT; @@ -1162,7 +1160,8 @@ retry: if (ret) return ret; - CLASS(hb, hb)(&key); + CLASS(hbr, hbr)(&key); + auto hb = hbr.hb; spin_lock(&hb->lock); retry_hb: @@ -1219,8 +1218,9 @@ retry_hb: * Acquire a reference for the leaving waiter to ensure * valid futex_q::lock_ptr. */ - futex_hash_get(hb); - top_waiter->drop_hb_ref = true; + if (futex_key_is_private(&key)) + top_waiter->drop_fph = futex_private_hash(key.private.mm); + __futex_unqueue(top_waiter); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); goto retry_hb; @@ -1302,3 +1302,15 @@ pi_faulted: return ret; } +int futex_unlock_pi(u32 __user *uaddr, unsigned int flags, void __user *pop) +{ + int ret = __futex_unlock_pi(uaddr, flags); + + if (ret || !(flags & FLAGS_ROBUST_UNLOCK)) + return ret; + + if (!futex_robust_list_clear_pending(pop, flags)) + return -EFAULT; + + return 0; +} diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c index 1d99a84dc9ad..7384672916fb 100644 --- a/kernel/futex/requeue.c +++ b/kernel/futex/requeue.c @@ -241,8 +241,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * Acquire a reference for the waiter to ensure valid * futex_q::lock_ptr. */ - futex_hash_get(hb); - q->drop_hb_ref = true; + if (futex_key_is_private(key)) + q->drop_fph = futex_private_hash(key->private.mm); q->lock_ptr = &hb->lock; task = READ_ONCE(q->task); @@ -459,8 +459,10 @@ retry: retry_private: if (1) { - CLASS(hb, hb1)(&key1); - CLASS(hb, hb2)(&key2); + CLASS(hbr, hbr1)(&key1); + CLASS(hbr, hbr2)(&key2); + auto hb1 = hbr1.hb; + auto hb2 = hbr2.hb; futex_hb_waiters_inc(hb2); double_lock_hb(hb1, hb2); @@ -838,7 +840,8 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, switch (futex_requeue_pi_wakeup_sync(&q)) { case Q_REQUEUE_PI_IGNORE: { - CLASS(hb, hb)(&q.key); + CLASS(hbr, hbr)(&q.key); + auto hb = hbr.hb; /* The waiter is still on uaddr1 */ spin_lock(&hb->lock); ret = handle_early_requeue_pi_wakeup(hb, &q, to); @@ -908,11 +911,8 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, default: BUG(); } - if (q.drop_hb_ref) { - CLASS(hb, hb)(&q.key); - /* Additional reference from requeue_pi_wake_futex() */ - futex_hash_put(hb); - } + /* Additional reference from requeue_pi_wake_futex() */ + futex_private_hash_put(q.drop_fph); out: if (to) { diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 77ad9691f6a6..2fa19d9d008d 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -25,17 +25,13 @@ * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ -SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, - size_t, len) +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { - /* - * The kernel knows only one size for now: - */ + /* The kernel knows only one size for now. */ if (unlikely(len != sizeof(*head))) return -EINVAL; - current->robust_list = head; - + current->futex.robust_list = head; return 0; } @@ -43,9 +39,9 @@ static inline void __user *futex_task_robust_list(struct task_struct *p, bool co { #ifdef CONFIG_COMPAT if (compat) - return p->compat_robust_list; + return p->futex.compat_robust_list; #endif - return p->robust_list; + return p->futex.robust_list; } static void __user *futex_get_robust_list_common(int pid, bool compat) @@ -122,6 +118,13 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } + if (flags & FLAGS_ROBUST_UNLOCK) { + if (cmd != FUTEX_WAKE && + cmd != FUTEX_WAKE_BITSET && + cmd != FUTEX_UNLOCK_PI) + return -ENOSYS; + } + switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; @@ -132,7 +135,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, val3 = FUTEX_BITSET_MATCH_ANY; fallthrough; case FUTEX_WAKE_BITSET: - return futex_wake(uaddr, flags, val, val3); + return futex_wake(uaddr, flags, uaddr2, val, val3); case FUTEX_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: @@ -145,7 +148,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: - return futex_unlock_pi(uaddr, flags); + return futex_unlock_pi(uaddr, flags, uaddr2); case FUTEX_TRYLOCK_PI: return futex_lock_pi(uaddr, flags, NULL, 1); case FUTEX_WAIT_REQUEUE_PI: @@ -379,7 +382,7 @@ SYSCALL_DEFINE4(futex_wake, if (!futex_validate_input(flags, mask)) return -EINVAL; - return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); + return futex_wake(uaddr, FLAGS_STRICT | flags, NULL, nr, mask); } /* @@ -475,15 +478,13 @@ SYSCALL_DEFINE4(futex_requeue, } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, - compat_size_t, len) +COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, + compat_size_t, len) { if (unlikely(len != sizeof(*head))) return -EINVAL; - current->compat_robust_list = head; - + current->futex.compat_robust_list = head; return 0; } @@ -523,4 +524,3 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */ - diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ceed9d879059..d4483d15d30a 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -150,12 +150,35 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q) } /* + * If requested, clear the robust list pending op and unlock the futex + */ +static bool futex_robust_unlock(u32 __user *uaddr, unsigned int flags, void __user *pop) +{ + if (!(flags & FLAGS_ROBUST_UNLOCK)) + return true; + + /* First unlock the futex, which requires release semantics. */ + scoped_user_write_access(uaddr, efault) + unsafe_atomic_store_release_user(0, uaddr, efault); + + /* + * Clear the pending list op now. If that fails, then the task is in + * deeper trouble as the robust list head is usually part of the TLS. + * The chance of survival is close to zero. + */ + return futex_robust_list_clear_pending(pop, flags); + +efault: + return false; +} + +/* * Wake up waiters matching bitset queued on this futex (uaddr). */ -int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) +int futex_wake(u32 __user *uaddr, unsigned int flags, void __user *pop, int nr_wake, u32 bitset) { - struct futex_q *this, *next; union futex_key key = FUTEX_KEY_INIT; + struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); int ret; @@ -166,10 +189,14 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (unlikely(ret != 0)) return ret; + if (!futex_robust_unlock(uaddr, flags, pop)) + return -EFAULT; + if ((flags & FLAGS_STRICT) && !nr_wake) return 0; - CLASS(hb, hb)(&key); + CLASS(hbr, hbr)(&key); + auto hb = hbr.hb; /* Make sure we really have tasks to wakeup */ if (!futex_hb_waiters_pending(hb)) @@ -266,8 +293,10 @@ retry: retry_private: if (1) { - CLASS(hb, hb1)(&key1); - CLASS(hb, hb2)(&key2); + CLASS(hbr, hbr1)(&key1); + CLASS(hbr, hbr2)(&key2); + auto hb1 = hbr1.hb; + auto hb2 = hbr2.hb; double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); @@ -409,7 +438,7 @@ int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) * Make sure to have a reference on the private_hash such that we * don't block on rehash after changing the task state below. */ - guard(private_hash)(); + guard(private_hash)(current->mm); /* * Enqueuing multiple futexes is tricky, because we need to enqueue @@ -446,7 +475,8 @@ retry: u32 val = vs[i].w.val; if (1) { - CLASS(hb, hb)(&q->key); + CLASS(hbr, hbr)(&q->key); + auto hb = hbr.hb; futex_q_lock(q, hb); ret = futex_get_value_locked(&uval, uaddr); @@ -621,7 +651,8 @@ retry: retry_private: if (1) { - CLASS(hb, hb)(&q->key); + CLASS(hbr, hbr)(&q->key); + auto hb = hbr.hb; futex_q_lock(q, hb); |
