summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteven Rostedt (VMware) <rostedt@goodmis.org>2018-07-09 17:48:54 -0400
committerTejun Heo <tj@kernel.org>2018-07-11 10:48:47 -0700
commite4f8d81c738db6d3ffdabfb8329aa2feaa310699 (patch)
tree97153959e7c625bc49850709149a1faa5146a70e
parent1e09177acae32a61586af26d83ca5ef591cdcaf5 (diff)
downloadlwn-e4f8d81c738db6d3ffdabfb8329aa2feaa310699.tar.gz
lwn-e4f8d81c738db6d3ffdabfb8329aa2feaa310699.zip
cgroup/tracing: Move taking of spin lock out of trace event handlers
It is unwise to take spin locks from the handlers of trace events. Mainly, because they can introduce lockups, because it introduces locks in places that are normally not tested. Worse yet, because trace events are tucked away in the include/trace/events/ directory, locks that are taken there are forgotten about. As a general rule, I tell people never to take any locks in a trace event handler. Several cgroup trace event handlers call cgroup_path() which eventually takes the kernfs_rename_lock spinlock. This injects the spinlock in the code without people realizing it. It also can cause issues for the PREEMPT_RT patch, as the spinlock becomes a mutex, and the trace event handlers are called with preemption disabled. By moving the calculation of the cgroup_path() out of the trace event handlers and into a macro (surrounded by a trace_cgroup_##type##_enabled()), then we could place the cgroup_path into a string, and pass that to the trace event. Not only does this remove the taking of the spinlock out of the trace event handler, but it also means that the cgroup_path() only needs to be called once (it is currently called twice, once to get the length to reserver the buffer for, and once again to get the path itself. Now it only needs to be done once. Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/trace/events/cgroup.h47
-rw-r--r--kernel/cgroup/cgroup-internal.h26
-rw-r--r--kernel/cgroup/cgroup-v1.c4
-rw-r--r--kernel/cgroup/cgroup.c12
4 files changed, 58 insertions, 31 deletions
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index d74722c2ac8b..a401ff5e7847 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -53,24 +53,22 @@ DEFINE_EVENT(cgroup_root, cgroup_remount,
DECLARE_EVENT_CLASS(cgroup,
- TP_PROTO(struct cgroup *cgrp),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgrp),
+ TP_ARGS(cgrp, path),
TP_STRUCT__entry(
__field( int, root )
__field( int, id )
__field( int, level )
- __dynamic_array(char, path,
- cgroup_path(cgrp, NULL, 0) + 1)
+ __string( path, path )
),
TP_fast_assign(
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgrp->id;
__entry->level = cgrp->level;
- cgroup_path(cgrp, __get_dynamic_array(path),
- __get_dynamic_array_len(path));
+ __assign_str(path, path);
),
TP_printk("root=%d id=%d level=%d path=%s",
@@ -79,45 +77,45 @@ DECLARE_EVENT_CLASS(cgroup,
DEFINE_EVENT(cgroup, cgroup_mkdir,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_rmdir,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_release,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_rename,
- TP_PROTO(struct cgroup *cgroup),
+ TP_PROTO(struct cgroup *cgrp, const char *path),
- TP_ARGS(cgroup)
+ TP_ARGS(cgrp, path)
);
DECLARE_EVENT_CLASS(cgroup_migrate,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup),
+ TP_ARGS(dst_cgrp, path, task, threadgroup),
TP_STRUCT__entry(
__field( int, dst_root )
__field( int, dst_id )
__field( int, dst_level )
- __dynamic_array(char, dst_path,
- cgroup_path(dst_cgrp, NULL, 0) + 1)
__field( int, pid )
+ __string( dst_path, path )
__string( comm, task->comm )
),
@@ -125,8 +123,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
__entry->dst_root = dst_cgrp->root->hierarchy_id;
__entry->dst_id = dst_cgrp->id;
__entry->dst_level = dst_cgrp->level;
- cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
- __get_dynamic_array_len(dst_path));
+ __assign_str(dst_path, path);
__entry->pid = task->pid;
__assign_str(comm, task->comm);
),
@@ -138,16 +135,18 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup)
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
);
DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
- TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+ TP_PROTO(struct cgroup *dst_cgrp, const char *path,
+ struct task_struct *task, bool threadgroup),
- TP_ARGS(dst_cgrp, task, threadgroup)
+ TP_ARGS(dst_cgrp, path, task, threadgroup)
);
#endif /* _TRACE_CGROUP_H */
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 77ff1cd6a252..75568fcf2180 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -8,6 +8,32 @@
#include <linux/list.h>
#include <linux/refcount.h>
+#define TRACE_CGROUP_PATH_LEN 1024
+extern spinlock_t trace_cgroup_path_lock;
+extern char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
+/*
+ * cgroup_path() takes a spin lock. It is good practice not to take
+ * spin locks within trace point handlers, as they are mostly hidden
+ * from normal view. As cgroup_path() can take the kernfs_rename_lock
+ * spin lock, it is best to not call that function from the trace event
+ * handler.
+ *
+ * Note: trace_cgroup_##type##_enabled() is a static branch that will only
+ * be set when the trace event is enabled.
+ */
+#define TRACE_CGROUP_PATH(type, cgrp, ...) \
+ do { \
+ if (trace_cgroup_##type##_enabled()) { \
+ spin_lock(&trace_cgroup_path_lock); \
+ cgroup_path(cgrp, trace_cgroup_path, \
+ TRACE_CGROUP_PATH_LEN); \
+ trace_cgroup_##type(cgrp, trace_cgroup_path, \
+ ##__VA_ARGS__); \
+ spin_unlock(&trace_cgroup_path_lock); \
+ } \
+ } while (0)
+
/*
* A cgroup can be associated with multiple css_sets as different tasks may
* belong to different cgroups on different hierarchies. In the other
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8b4f0768efd6..51063e7a93c2 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -135,7 +135,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (task) {
ret = cgroup_migrate(task, false, &mgctx);
if (!ret)
- trace_cgroup_transfer_tasks(to, task, false);
+ TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
put_task_struct(task);
}
} while (task && !ret);
@@ -865,7 +865,7 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret)
- trace_cgroup_rename(cgrp);
+ TRACE_CGROUP_PATH(rename, cgrp);
mutex_unlock(&cgroup_mutex);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 077370bf8964..4a439de621bd 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -83,6 +83,9 @@ EXPORT_SYMBOL_GPL(cgroup_mutex);
EXPORT_SYMBOL_GPL(css_set_lock);
#endif
+DEFINE_SPINLOCK(trace_cgroup_path_lock);
+char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
+
/*
* Protects cgroup_idr and css_idr so that IDs can be released without
* grabbing cgroup_mutex.
@@ -2638,7 +2641,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
cgroup_migrate_finish(&mgctx);
if (!ret)
- trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+ TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
return ret;
}
@@ -4634,7 +4637,7 @@ static void css_release_work_fn(struct work_struct *work)
struct cgroup *tcgrp;
/* cgroup release path */
- trace_cgroup_release(cgrp);
+ TRACE_CGROUP_PATH(release, cgrp);
if (cgroup_on_dfl(cgrp))
cgroup_rstat_flush(cgrp);
@@ -4977,7 +4980,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
if (ret)
goto out_destroy;
- trace_cgroup_mkdir(cgrp);
+ TRACE_CGROUP_PATH(mkdir, cgrp);
/* let's create and online css's */
kernfs_activate(kn);
@@ -5165,9 +5168,8 @@ int cgroup_rmdir(struct kernfs_node *kn)
return 0;
ret = cgroup_destroy_locked(cgrp);
-
if (!ret)
- trace_cgroup_rmdir(cgrp);
+ TRACE_CGROUP_PATH(rmdir, cgrp);
cgroup_kn_unlock(kn);
return ret;