diff options
author | Shailabh Nagar <nagar@watson.ibm.com> | 2006-07-14 00:24:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-14 21:53:57 -0700 |
commit | ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd (patch) | |
tree | a2f5b98598948525de77ab594e4432f09a230388 /kernel | |
parent | 25890454667b3295f67b3372352be90705f8667c (diff) | |
download | lwn-ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd.tar.gz lwn-ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd.zip |
[PATCH] delay accounting taskstats interface send tgid once
Send per-tgid data only once during exit of a thread group instead of once
with each member thread exit.
Currently, when a thread exits, besides its per-tid data, the per-tgid data
of its thread group is also sent out, if its thread group is non-empty.
The per-tgid data sent consists of the sum of per-tid stats for all
*remaining* threads of the thread group.
This patch modifies this sending in two ways:
- the per-tgid data is sent only when the last thread of a thread group
exits. This cuts down heavily on the overhead of sending/receiving
per-tgid data, especially when other exploiters of the taskstats
interface aren't interested in per-tgid stats
- the semantics of the per-tgid data sent are changed. Instead of being
the sum of per-tid data for remaining threads, the value now sent is the
true total accumalated statistics for all threads that are/were part of
the thread group.
The patch also addresses a minor issue where failure of one accounting
subsystem to fill in the taskstats structure was causing the send of
taskstats to not be sent at all.
The patch has been tested for stability and run cerberus for over 4 hours
on an SMP.
[akpm@osdl.org: bugfixes]
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/taskstats.c | 98 |
3 files changed, 74 insertions, 36 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 9852ed8c2988..67c1e9a4f812 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -845,7 +845,7 @@ static void exit_notify(struct task_struct *tsk) fastcall NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; - struct taskstats *tidstats, *tgidstats; + struct taskstats *tidstats; int group_dead; profile_task_exit(tsk); @@ -884,7 +884,7 @@ fastcall NORET_TYPE void do_exit(long code) current->comm, current->pid, preempt_count()); - taskstats_exit_alloc(&tidstats, &tgidstats); + taskstats_exit_alloc(&tidstats); acct_update_integrals(tsk); if (tsk->mm) { @@ -905,8 +905,8 @@ fastcall NORET_TYPE void do_exit(long code) #endif if (unlikely(tsk->audit_context)) audit_free(tsk); - taskstats_exit_send(tsk, tidstats, tgidstats); - taskstats_exit_free(tidstats, tgidstats); + taskstats_exit_send(tsk, tidstats, group_dead); + taskstats_exit_free(tidstats); delayacct_tsk_exit(tsk); exit_mm(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 451cfd35bf22..1b0f7b1e0881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -44,6 +44,7 @@ #include <linux/acct.h> #include <linux/cn_proc.h> #include <linux/delayacct.h> +#include <linux/taskstats_kern.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -819,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); + taskstats_tgid_alloc(current->signal); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); @@ -863,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); + taskstats_tgid_init(sig); task_lock(current->group_leader); memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); @@ -884,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts void __cleanup_signal(struct signal_struct *sig) { exit_thread_group_keys(sig); + taskstats_tgid_free(sig); kmem_cache_free(signal_cachep, sig); } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index ea9506de3b85..4a0a5022b299 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -132,46 +132,79 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, struct taskstats *stats) { - int rc; struct task_struct *tsk, *first; + unsigned long flags; + /* + * Add additional stats from live tasks except zombie thread group + * leaders who are already counted with the dead tasks + */ first = tgidtsk; - read_lock(&tasklist_lock); if (!first) { + read_lock(&tasklist_lock); first = find_task_by_pid(tgid); if (!first) { read_unlock(&tasklist_lock); return -ESRCH; } - } + get_task_struct(first); + read_unlock(&tasklist_lock); + } else + get_task_struct(first); + + /* Start with stats from dead tasks */ + spin_lock_irqsave(&first->signal->stats_lock, flags); + if (first->signal->stats) + memcpy(stats, first->signal->stats, sizeof(*stats)); + spin_unlock_irqrestore(&first->signal->stats_lock, flags); + tsk = first; + read_lock(&tasklist_lock); do { + if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) + continue; /* - * Each accounting subsystem adds calls its functions to + * Accounting subsystem can call its functions here to * fill in relevant parts of struct taskstsats as follows * - * rc = per-task-foo(stats, tsk); - * if (rc) - * break; + * per-task-foo(stats, tsk); */ - - rc = delayacct_add_tsk(stats, tsk); - if (rc) - break; + delayacct_add_tsk(stats, tsk); } while_each_thread(first, tsk); read_unlock(&tasklist_lock); stats->version = TASKSTATS_VERSION; - /* - * Accounting subsytems can also add calls here if they don't - * wish to aggregate statistics for per-tgid stats + * Accounting subsytems can also add calls here to modify + * fields of taskstats. */ - return rc; + return 0; +} + + +static void fill_tgid_exit(struct task_struct *tsk) +{ + unsigned long flags; + + spin_lock_irqsave(&tsk->signal->stats_lock, flags); + if (!tsk->signal->stats) + goto ret; + + /* + * Each accounting subsystem calls its functions here to + * accumalate its per-task stats for tsk, into the per-tgid structure + * + * per-task-foo(tsk->signal->stats, tsk); + */ + delayacct_add_tsk(tsk->signal->stats, tsk); +ret: + spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); + return; } + static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info) { int rc = 0; @@ -230,7 +263,7 @@ err: /* Send pid data out on exit */ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, - struct taskstats *tgidstats) + int group_dead) { int rc; struct sk_buff *rep_skb; @@ -238,13 +271,16 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, size_t size; int is_thread_group; struct nlattr *na; + unsigned long flags; if (!family_registered || !tidstats) return; - is_thread_group = !thread_group_empty(tsk); - rc = 0; + spin_lock_irqsave(&tsk->signal->stats_lock, flags); + is_thread_group = tsk->signal->stats ? 1 : 0; + spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); + rc = 0; /* * Size includes space for nested attributes */ @@ -268,30 +304,28 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, *tidstats); nla_nest_end(rep_skb, na); - if (!is_thread_group || !tgidstats) { - send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); - goto ret; - } + if (!is_thread_group) + goto send; - rc = fill_tgid(tsk->pid, tsk, tgidstats); /* - * If fill_tgid() failed then one probable reason could be that the - * thread group leader has exited. fill_tgid() will fail, send out - * the pid statistics collected earlier. + * tsk has/had a thread group so fill the tsk->signal->stats structure + * Doesn't matter if tsk is the leader or the last group member leaving */ - if (rc < 0) { - send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); - goto ret; - } + + fill_tgid_exit(tsk); + if (!group_dead) + goto send; na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID); NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid); + /* No locking needed for tsk->signal->stats since group is dead */ NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS, - *tgidstats); + *tsk->signal->stats); nla_nest_end(rep_skb, na); +send: send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST); - goto ret; + return; nla_put_failure: genlmsg_cancel(rep_skb, reply); |