summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 00:24:44 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-14 21:53:57 -0700
commitad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd (patch)
treea2f5b98598948525de77ab594e4432f09a230388 /kernel
parent25890454667b3295f67b3372352be90705f8667c (diff)
downloadlwn-ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd.tar.gz
lwn-ad4ecbcba72855a2b5319b96e2a3a65ed1ca3bfd.zip
[PATCH] delay accounting taskstats interface send tgid once
Send per-tgid data only once during exit of a thread group instead of once with each member thread exit. Currently, when a thread exits, besides its per-tid data, the per-tgid data of its thread group is also sent out, if its thread group is non-empty. The per-tgid data sent consists of the sum of per-tid stats for all *remaining* threads of the thread group. This patch modifies this sending in two ways: - the per-tgid data is sent only when the last thread of a thread group exits. This cuts down heavily on the overhead of sending/receiving per-tgid data, especially when other exploiters of the taskstats interface aren't interested in per-tgid stats - the semantics of the per-tgid data sent are changed. Instead of being the sum of per-tid data for remaining threads, the value now sent is the true total accumalated statistics for all threads that are/were part of the thread group. The patch also addresses a minor issue where failure of one accounting subsystem to fill in the taskstats structure was causing the send of taskstats to not be sent at all. The patch has been tested for stability and run cerberus for over 4 hours on an SMP. [akpm@osdl.org: bugfixes] Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/taskstats.c98
3 files changed, 74 insertions, 36 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 9852ed8c2988..67c1e9a4f812 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -845,7 +845,7 @@ static void exit_notify(struct task_struct *tsk)
fastcall NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
- struct taskstats *tidstats, *tgidstats;
+ struct taskstats *tidstats;
int group_dead;
profile_task_exit(tsk);
@@ -884,7 +884,7 @@ fastcall NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
- taskstats_exit_alloc(&tidstats, &tgidstats);
+ taskstats_exit_alloc(&tidstats);
acct_update_integrals(tsk);
if (tsk->mm) {
@@ -905,8 +905,8 @@ fastcall NORET_TYPE void do_exit(long code)
#endif
if (unlikely(tsk->audit_context))
audit_free(tsk);
- taskstats_exit_send(tsk, tidstats, tgidstats);
- taskstats_exit_free(tidstats, tgidstats);
+ taskstats_exit_send(tsk, tidstats, group_dead);
+ taskstats_exit_free(tidstats);
delayacct_tsk_exit(tsk);
exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 451cfd35bf22..1b0f7b1e0881 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -44,6 +44,7 @@
#include <linux/acct.h>
#include <linux/cn_proc.h>
#include <linux/delayacct.h>
+#include <linux/taskstats_kern.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -819,6 +820,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
if (clone_flags & CLONE_THREAD) {
atomic_inc(&current->signal->count);
atomic_inc(&current->signal->live);
+ taskstats_tgid_alloc(current->signal);
return 0;
}
sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
@@ -863,6 +865,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
INIT_LIST_HEAD(&sig->cpu_timers[0]);
INIT_LIST_HEAD(&sig->cpu_timers[1]);
INIT_LIST_HEAD(&sig->cpu_timers[2]);
+ taskstats_tgid_init(sig);
task_lock(current->group_leader);
memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
@@ -884,6 +887,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
void __cleanup_signal(struct signal_struct *sig)
{
exit_thread_group_keys(sig);
+ taskstats_tgid_free(sig);
kmem_cache_free(signal_cachep, sig);
}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index ea9506de3b85..4a0a5022b299 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -132,46 +132,79 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
struct taskstats *stats)
{
- int rc;
struct task_struct *tsk, *first;
+ unsigned long flags;
+ /*
+ * Add additional stats from live tasks except zombie thread group
+ * leaders who are already counted with the dead tasks
+ */
first = tgidtsk;
- read_lock(&tasklist_lock);
if (!first) {
+ read_lock(&tasklist_lock);
first = find_task_by_pid(tgid);
if (!first) {
read_unlock(&tasklist_lock);
return -ESRCH;
}
- }
+ get_task_struct(first);
+ read_unlock(&tasklist_lock);
+ } else
+ get_task_struct(first);
+
+ /* Start with stats from dead tasks */
+ spin_lock_irqsave(&first->signal->stats_lock, flags);
+ if (first->signal->stats)
+ memcpy(stats, first->signal->stats, sizeof(*stats));
+ spin_unlock_irqrestore(&first->signal->stats_lock, flags);
+
tsk = first;
+ read_lock(&tasklist_lock);
do {
+ if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk))
+ continue;
/*
- * Each accounting subsystem adds calls its functions to
+ * Accounting subsystem can call its functions here to
* fill in relevant parts of struct taskstsats as follows
*
- * rc = per-task-foo(stats, tsk);
- * if (rc)
- * break;
+ * per-task-foo(stats, tsk);
*/
-
- rc = delayacct_add_tsk(stats, tsk);
- if (rc)
- break;
+ delayacct_add_tsk(stats, tsk);
} while_each_thread(first, tsk);
read_unlock(&tasklist_lock);
stats->version = TASKSTATS_VERSION;
-
/*
- * Accounting subsytems can also add calls here if they don't
- * wish to aggregate statistics for per-tgid stats
+ * Accounting subsytems can also add calls here to modify
+ * fields of taskstats.
*/
- return rc;
+ return 0;
+}
+
+
+static void fill_tgid_exit(struct task_struct *tsk)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&tsk->signal->stats_lock, flags);
+ if (!tsk->signal->stats)
+ goto ret;
+
+ /*
+ * Each accounting subsystem calls its functions here to
+ * accumalate its per-task stats for tsk, into the per-tgid structure
+ *
+ * per-task-foo(tsk->signal->stats, tsk);
+ */
+ delayacct_add_tsk(tsk->signal->stats, tsk);
+ret:
+ spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
+ return;
}
+
static int taskstats_send_stats(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
@@ -230,7 +263,7 @@ err:
/* Send pid data out on exit */
void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
- struct taskstats *tgidstats)
+ int group_dead)
{
int rc;
struct sk_buff *rep_skb;
@@ -238,13 +271,16 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
size_t size;
int is_thread_group;
struct nlattr *na;
+ unsigned long flags;
if (!family_registered || !tidstats)
return;
- is_thread_group = !thread_group_empty(tsk);
- rc = 0;
+ spin_lock_irqsave(&tsk->signal->stats_lock, flags);
+ is_thread_group = tsk->signal->stats ? 1 : 0;
+ spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
+ rc = 0;
/*
* Size includes space for nested attributes
*/
@@ -268,30 +304,28 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
*tidstats);
nla_nest_end(rep_skb, na);
- if (!is_thread_group || !tgidstats) {
- send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
- goto ret;
- }
+ if (!is_thread_group)
+ goto send;
- rc = fill_tgid(tsk->pid, tsk, tgidstats);
/*
- * If fill_tgid() failed then one probable reason could be that the
- * thread group leader has exited. fill_tgid() will fail, send out
- * the pid statistics collected earlier.
+ * tsk has/had a thread group so fill the tsk->signal->stats structure
+ * Doesn't matter if tsk is the leader or the last group member leaving
*/
- if (rc < 0) {
- send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
- goto ret;
- }
+
+ fill_tgid_exit(tsk);
+ if (!group_dead)
+ goto send;
na = nla_nest_start(rep_skb, TASKSTATS_TYPE_AGGR_TGID);
NLA_PUT_U32(rep_skb, TASKSTATS_TYPE_TGID, (u32)tsk->tgid);
+ /* No locking needed for tsk->signal->stats since group is dead */
NLA_PUT_TYPE(rep_skb, struct taskstats, TASKSTATS_TYPE_STATS,
- *tgidstats);
+ *tsk->signal->stats);
nla_nest_end(rep_skb, na);
+send:
send_reply(rep_skb, 0, TASKSTATS_MSG_MULTICAST);
- goto ret;
+ return;
nla_put_failure:
genlmsg_cancel(rep_skb, reply);