From 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:22 -0800 Subject: [PATCH] lightweight robust futexes: core Add the core infrastructure for robust futexes: structure definitions, the new syscalls and the do_exit() based cleanup mechanism. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/futex.h | 95 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 3 ++ include/linux/threads.h | 3 +- 3 files changed, 100 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 10f96c31971e..20face6b798d 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -1,6 +1,8 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include + /* Second argument to futex syscall */ @@ -11,10 +13,103 @@ #define FUTEX_CMP_REQUEUE 4 #define FUTEX_WAKE_OP 5 +/* + * Support for robust futexes: the kernel cleans up held futexes at + * thread exit time. + */ + +/* + * Per-lock list entry - embedded in user-space locks, somewhere close + * to the futex field. (Note: user-space uses a double-linked list to + * achieve O(1) list add and remove, but the kernel only needs to know + * about the forward link) + * + * NOTE: this structure is part of the syscall ABI, and must not be + * changed. + */ +struct robust_list { + struct robust_list __user *next; +}; + +/* + * Per-thread list head: + * + * NOTE: this structure is part of the syscall ABI, and must only be + * changed if the change is first communicated with the glibc folks. + * (When an incompatible change is done, we'll increase the structure + * size, which glibc will detect) + */ +struct robust_list_head { + /* + * The head of the list. Points back to itself if empty: + */ + struct robust_list list; + + /* + * This relative offset is set by user-space, it gives the kernel + * the relative position of the futex field to examine. This way + * we keep userspace flexible, to freely shape its data-structure, + * without hardcoding any particular offset into the kernel: + */ + long futex_offset; + + /* + * The death of the thread may race with userspace setting + * up a lock's links. So to handle this race, userspace first + * sets this field to the address of the to-be-taken lock, + * then does the lock acquire, and then adds itself to the + * list, and then clears this field. Hence the kernel will + * always have full knowledge of all locks that the thread + * _might_ have taken. We check the owner TID in any case, + * so only truly owned locks will be handled. + */ + struct robust_list __user *list_op_pending; +}; + +/* + * Are there any waiters for this robust futex: + */ +#define FUTEX_WAITERS 0x80000000 + +/* + * The kernel signals via this bit that a thread holding a futex + * has exited without unlocking the futex. The kernel also does + * a FUTEX_WAKE on such futexes, after setting the bit, to wake + * up any possible waiters: + */ +#define FUTEX_OWNER_DIED 0x40000000 + +/* + * Reserved bit: + */ +#define FUTEX_OWNER_PENDING 0x20000000 + +/* + * The rest of the robust-futex field is for the TID: + */ +#define FUTEX_TID_MASK 0x1fffffff + +/* + * A limit of one million locks held per thread (!) ought to be enough + * for some time. This also protects against a deliberately circular + * list. Not worth introducing an rlimit for this: + */ +#define ROBUST_LIST_LIMIT 1048576 + long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); +extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); + +#ifdef CONFIG_FUTEX +extern void exit_robust_list(struct task_struct *curr); +#else +static inline void exit_robust_list(struct task_struct *curr) +{ +} +#endif + #define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ #define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ #define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 036d14d2bf90..fd4848f2d750 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,7 @@ #include #include #include +#include #include /* For AT_VECTOR_SIZE */ @@ -872,6 +873,8 @@ struct task_struct { int cpuset_mems_generation; int cpuset_mem_spread_rotor; #endif + struct robust_list_head __user *robust_list; + atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; }; diff --git a/include/linux/threads.h b/include/linux/threads.h index b59738ac6197..e646bcdf2614 100644 --- a/include/linux/threads.h +++ b/include/linux/threads.h @@ -28,7 +28,8 @@ #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000) /* - * A maximum of 4 million PIDs should be enough for a while: + * A maximum of 4 million PIDs should be enough for a while. + * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.] */ #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \ (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT)) -- cgit v1.2.3