summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2022-02-17 11:24:04 +0100
committerThomas Gleixner <tglx@linutronix.de>2022-02-22 22:25:02 +0100
commit1a03d3f13ffe5dd24142d6db629e72c11b704d99 (patch)
tree84657d0e0eb3fb7653a15caf35822407dddb0274
parentf1c1a9ee00e4c53c9ccc03ec1aff4792948a25eb (diff)
downloadlwn-1a03d3f13ffe5dd24142d6db629e72c11b704d99.tar.gz
lwn-1a03d3f13ffe5dd24142d6db629e72c11b704d99.zip
fork: Move task stack accounting to do_exit()
There is no need to perform the stack accounting of the outgoing task in its final schedule() invocation which happens with preemption disabled. The task is leaving, the resources will be freed and the accounting can happen in do_exit() before the actual schedule invocation which frees the stack memory. Move the accounting of the stack memory from release_task_stack() to exit_task_stack_account() which then can be invoked from do_exit(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Andy Lutomirski <luto@kernel.org> Link: https://lore.kernel.org/r/20220217102406.3697941-7-bigeasy@linutronix.de
-rw-r--r--include/linux/sched/task_stack.h2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c35
3 files changed, 26 insertions, 12 deletions
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index d10150587d81..892562ebbd3a 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -79,6 +79,8 @@ static inline void *try_get_task_stack(struct task_struct *tsk)
static inline void put_task_stack(struct task_struct *tsk) {}
#endif
+void exit_task_stack_account(struct task_struct *tsk);
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
diff --git a/kernel/exit.c b/kernel/exit.c
index b00a25bb4ab9..c303cffe7fdb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,6 +845,7 @@ void __noreturn do_exit(long code)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ exit_task_stack_account(tsk);
check_stack_usage();
preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index ac63e7fa8816..25828127db8d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -211,9 +211,8 @@ static int free_vm_stack_cache(unsigned int cpu)
return 0;
}
-static int memcg_charge_kernel_stack(struct task_struct *tsk)
+static int memcg_charge_kernel_stack(struct vm_struct *vm)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
int ret;
@@ -239,6 +238,7 @@ err:
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
+ struct vm_struct *vm;
void *stack;
int i;
@@ -256,7 +256,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
- if (memcg_charge_kernel_stack(tsk)) {
+ if (memcg_charge_kernel_stack(s)) {
vfree(s->addr);
return -ENOMEM;
}
@@ -279,7 +279,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!stack)
return -ENOMEM;
- if (memcg_charge_kernel_stack(tsk)) {
+ vm = find_vm_area(stack);
+ if (memcg_charge_kernel_stack(vm)) {
vfree(stack);
return -ENOMEM;
}
@@ -288,19 +289,15 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* free_thread_stack() can be called in interrupt context,
* so cache the vm_struct.
*/
- tsk->stack_vm_area = find_vm_area(stack);
+ tsk->stack_vm_area = vm;
tsk->stack = stack;
return 0;
}
static void free_thread_stack(struct task_struct *tsk)
{
- struct vm_struct *vm = task_stack_vm_area(tsk);
int i;
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
- memcg_kmem_uncharge_page(vm->pages[i], 0);
-
for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i], NULL,
tsk->stack_vm_area) != NULL)
@@ -454,12 +451,25 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
}
}
+void exit_task_stack_account(struct task_struct *tsk)
+{
+ account_kernel_stack(tsk, -1);
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ struct vm_struct *vm;
+ int i;
+
+ vm = task_stack_vm_area(tsk);
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
+ memcg_kmem_uncharge_page(vm->pages[i], 0);
+ }
+}
+
static void release_task_stack(struct task_struct *tsk)
{
if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
return; /* Better to leak the stack than to free prematurely */
- account_kernel_stack(tsk, -1);
free_thread_stack(tsk);
}
@@ -918,6 +928,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif
+ account_kernel_stack(tsk, 1);
err = scs_prepare(tsk, node);
if (err)
@@ -961,8 +972,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->wake_q.next = NULL;
tsk->worker_private = NULL;
- account_kernel_stack(tsk, 1);
-
kcov_task_init(tsk);
kmap_local_fork(tsk);
@@ -981,6 +990,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
return tsk;
free_stack:
+ exit_task_stack_account(tsk);
free_thread_stack(tsk);
free_tsk:
free_task_struct(tsk);
@@ -2459,6 +2469,7 @@ bad_fork_cleanup_count:
exit_creds(p);
bad_fork_free:
WRITE_ONCE(p->__state, TASK_DEAD);
+ exit_task_stack_account(p);
put_task_stack(p);
delayed_free_task(p);
fork_out: