summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-11 20:47:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-11 20:47:30 -0800
commit6f696eb17be741668810fe1f798135c7cf6733e2 (patch)
treef9bcfe5831dfcaaad50ca68d7f04d80d8236fa56 /kernel
parentc4e194e3b71ff4fed01d727c32ee1071921d28a3 (diff)
parent125580380f418000b1a06d9a54700f1191b6e561 (diff)
downloadlwn-6f696eb17be741668810fe1f798135c7cf6733e2.tar.gz
lwn-6f696eb17be741668810fe1f798135c7cf6733e2.zip
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (57 commits) x86, perf events: Check if we have APIC enabled perf_event: Fix variable initialization in other codepaths perf kmem: Fix unused argument build warning perf symbols: perf_header__read_build_ids() offset'n'size should be u64 perf symbols: dsos__read_build_ids() should read both user and kernel buildids perf tools: Align long options which have no short forms perf kmem: Show usage if no option is specified sched: Mark sched_clock() as notrace perf sched: Add max delay time snapshot perf tools: Correct size given to memset perf_event: Fix perf_swevent_hrtimer() variable initialization perf sched: Fix for getting task's execution time tracing/kprobes: Fix field creation's bad error handling perf_event: Cleanup for cpu_clock_perf_event_update() perf_event: Allocate children's perf_event_ctxp at the right time perf_event: Clean up __perf_event_init_context() hw-breakpoints: Modify breakpoints without unregistering them perf probe: Update perf-probe document perf probe: Support --del option trace-kprobe: Support delete probe syntax ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hw_breakpoint.c146
-rw-r--r--kernel/perf_event.c75
-rw-r--r--kernel/trace/trace_kprobe.c41
-rw-r--r--kernel/trace/trace_ksym.c5
4 files changed, 159 insertions, 108 deletions
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index cf5ee1628411..366eedf949c0 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -52,7 +52,7 @@
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
/* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
/* Number of non-pinned cpu/task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
static unsigned int max_task_bp_pinned(int cpu)
{
int i;
- unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+ unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
for (i = HBP_NUM -1; i >= 0; i--) {
if (tsk_pinned[i] > 0)
@@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0;
}
+static int task_bp_pinned(struct task_struct *tsk)
+{
+ struct perf_event_context *ctx = tsk->perf_event_ctxp;
+ struct list_head *list;
+ struct perf_event *bp;
+ unsigned long flags;
+ int count = 0;
+
+ if (WARN_ONCE(!ctx, "No perf context for this task"))
+ return 0;
+
+ list = &ctx->event_list;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ /*
+ * The current breakpoint counter is not included in the list
+ * at the open() callback time
+ */
+ list_for_each_entry(bp, list, event_entry) {
+ if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+ count++;
+ }
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ return count;
+}
+
/*
* Report the number of pinned/un-pinned breakpoints we have in
* a given cpu (cpu > -1) or in all of them (cpu = -1).
*/
-static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+static void
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
{
+ int cpu = bp->cpu;
+ struct task_struct *tsk = bp->ctx->task;
+
if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
- slots->pinned += max_task_bp_pinned(cpu);
+ if (!tsk)
+ slots->pinned += max_task_bp_pinned(cpu);
+ else
+ slots->pinned += task_bp_pinned(tsk);
slots->flexible = per_cpu(nr_bp_flexible, cpu);
return;
@@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu);
- nr += max_task_bp_pinned(cpu);
+ if (!tsk)
+ nr += max_task_bp_pinned(cpu);
+ else
+ nr += task_bp_pinned(tsk);
if (nr > slots->pinned)
slots->pinned = nr;
@@ -118,35 +157,12 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
*/
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
{
- int count = 0;
- struct perf_event *bp;
- struct perf_event_context *ctx = tsk->perf_event_ctxp;
unsigned int *tsk_pinned;
- struct list_head *list;
- unsigned long flags;
-
- if (WARN_ONCE(!ctx, "No perf context for this task"))
- return;
-
- list = &ctx->event_list;
-
- spin_lock_irqsave(&ctx->lock, flags);
-
- /*
- * The current breakpoint counter is not included in the list
- * at the open() callback time
- */
- list_for_each_entry(bp, list, event_entry) {
- if (bp->attr.type == PERF_TYPE_BREAKPOINT)
- count++;
- }
+ int count = 0;
- spin_unlock_irqrestore(&ctx->lock, flags);
+ count = task_bp_pinned(tsk);
- if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
- return;
-
- tsk_pinned = per_cpu(task_bp_pinned, cpu);
+ tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
if (enable) {
tsk_pinned[count]++;
if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
- * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
*
* -> If there are already non-pinned counters in this cpu, it means
* there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
- * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
*
* -> This is roughly the same, except we check the number of per cpu
* bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
- * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
*
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep
* one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
- * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
*/
int reserve_bp_slot(struct perf_event *bp)
{
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex);
- fetch_bp_busy_slots(&slots, bp->cpu);
+ fetch_bp_busy_slots(&slots, bp);
/* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
}
-int __register_perf_hw_breakpoint(struct perf_event *bp)
+int register_perf_hw_breakpoint(struct perf_event *bp)
{
int ret;
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
* This is a quick hack that will be removed soon, once we remove
* the tmp breakpoints from ptrace
*/
- if (!bp->attr.disabled || bp->callback == perf_bp_event)
+ if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
return ret;
}
-int register_perf_hw_breakpoint(struct perf_event *bp)
-{
- bp->callback = perf_bp_event;
-
- return __register_perf_hw_breakpoint(bp);
-}
-
/**
* register_user_hw_breakpoint - register a hardware breakpoint for user space
* @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
*/
struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr,
- perf_callback_t triggered,
+ perf_overflow_handler_t triggered,
struct task_struct *tsk)
{
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
* @triggered: callback to trigger when we hit the breakpoint
* @tsk: pointer to 'task_struct' of the process to which the address belongs
*/
-struct perf_event *
-modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
- perf_callback_t triggered,
- struct task_struct *tsk)
+int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
{
- /*
- * FIXME: do it without unregistering
- * - We don't want to lose our slot
- * - If the new bp is incorrect, don't lose the older one
- */
- unregister_hw_breakpoint(bp);
+ u64 old_addr = bp->attr.bp_addr;
+ int old_type = bp->attr.bp_type;
+ int old_len = bp->attr.bp_len;
+ int err = 0;
- return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
+ perf_event_disable(bp);
+
+ bp->attr.bp_addr = attr->bp_addr;
+ bp->attr.bp_type = attr->bp_type;
+ bp->attr.bp_len = attr->bp_len;
+
+ if (attr->disabled)
+ goto end;
+
+ err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+ if (!err)
+ perf_event_enable(bp);
+
+ if (err) {
+ bp->attr.bp_addr = old_addr;
+ bp->attr.bp_type = old_type;
+ bp->attr.bp_len = old_len;
+ if (!bp->attr.disabled)
+ perf_event_enable(bp);
+
+ return err;
+ }
+
+end:
+ bp->attr.disabled = attr->disabled;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*/
struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr,
- perf_callback_t triggered)
+ perf_overflow_handler_t triggered)
{
struct perf_event **cpu_events, **pevent, *bp;
long err;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 40a996ec39fa..e73e53c7582f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -36,7 +36,7 @@
/*
* Each CPU has a list of per CPU events:
*/
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
int perf_max_events __read_mostly = 1;
static int perf_reserved_percpu __read_mostly;
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
-static void perf_event_disable(struct perf_event *event)
+void perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info)
* perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable.
*/
-static void perf_event_enable(struct perf_event *event)
+void perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -1579,7 +1579,6 @@ static void
__perf_event_init_context(struct perf_event_context *ctx,
struct task_struct *task)
{
- memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->group_list);
@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
}
if (!ctx) {
- ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+ ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
err = -ENOMEM;
if (!ctx)
goto errout;
@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
data.addr = 0;
+ data.raw = NULL;
data.period = event->hw.last_period;
regs = get_irq_regs();
/*
@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now;
now = cpu_clock(cpu);
- prev = atomic64_read(&event->hw.prev_count);
- atomic64_set(&event->hw.prev_count, now);
+ prev = atomic64_xchg(&event->hw.prev_count, now);
atomic64_add(now - prev, &event->count);
}
@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
- /*
- * The breakpoint is already filled if we haven't created the counter
- * through perf syscall
- * FIXME: manage to get trigerred to NULL if it comes from syscalls
- */
- if (!bp->callback)
- err = register_perf_hw_breakpoint(bp);
- else
- err = __register_perf_hw_breakpoint(bp);
+
+ err = register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
+ sample.raw = NULL;
sample.addr = bp->attr.bp_addr;
if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
- perf_callback_t callback,
+ perf_overflow_handler_t overflow_handler,
gfp_t gfpflags)
{
const struct pmu *pmu;
@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE;
- if (!callback && parent_event)
- callback = parent_event->callback;
+ if (!overflow_handler && parent_event)
+ overflow_handler = parent_event->overflow_handler;
- event->callback = callback;
+ event->overflow_handler = overflow_handler;
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
@@ -4776,7 +4769,8 @@ err_put_context:
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
- pid_t pid, perf_callback_t callback)
+ pid_t pid,
+ perf_overflow_handler_t overflow_handler)
{
struct perf_event *event;
struct perf_event_context *ctx;
@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
}
event = perf_event_alloc(attr, cpu, ctx, NULL,
- NULL, callback, GFP_KERNEL);
+ NULL, overflow_handler, GFP_KERNEL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_put_context;
@@ -5090,7 +5084,7 @@ again:
*/
int perf_event_init_task(struct task_struct *child)
{
- struct perf_event_context *child_ctx, *parent_ctx;
+ struct perf_event_context *child_ctx = NULL, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
@@ -5106,20 +5100,6 @@ int perf_event_init_task(struct task_struct *child)
return 0;
/*
- * This is executed from the parent task context, so inherit
- * events that have been marked for cloning.
- * First allocate and initialize a context for the child.
- */
-
- child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
- if (!child_ctx)
- return -ENOMEM;
-
- __perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
- get_task_struct(child);
-
- /*
* If the parent's context is a clone, pin it so it won't get
* swapped under us.
*/
@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child)
continue;
}
+ if (!child->perf_event_ctxp) {
+ /*
+ * This is executed from the parent task context, so
+ * inherit events that have been marked for cloning.
+ * First allocate and initialize a context for the
+ * child.
+ */
+
+ child_ctx = kzalloc(sizeof(struct perf_event_context),
+ GFP_KERNEL);
+ if (!child_ctx) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ __perf_event_init_context(child_ctx, child);
+ child->perf_event_ctxp = child_ctx;
+ get_task_struct(child);
+ }
+
ret = inherit_group(event, parent, parent_ctx,
child, child_ctx);
if (ret) {
@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child)
get_ctx(child_ctx->parent_ctx);
}
+exit:
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aff5f80b59b8..b52d397e57eb 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
*/
struct trace_probe *tp;
int i, ret = 0;
- int is_return = 0;
+ int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
- if (argc < 2) {
- pr_info("Probe point is not specified.\n");
- return -EINVAL;
- }
-
+ /* argc must be >= 1 */
if (argv[0][0] == 'p')
is_return = 0;
else if (argv[0][0] == 'r')
is_return = 1;
+ else if (argv[0][0] == '-')
+ is_delete = 1;
else {
- pr_info("Probe definition must be started with 'p' or 'r'.\n");
+ pr_info("Probe definition must be started with 'p', 'r' or"
+ " '-'.\n");
return -EINVAL;
}
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
}
+ if (!group)
+ group = KPROBE_EVENT_SYSTEM;
+ if (is_delete) {
+ if (!event) {
+ pr_info("Delete command needs an event name.\n");
+ return -EINVAL;
+ }
+ tp = find_probe_event(event, group);
+ if (!tp) {
+ pr_info("Event %s/%s doesn't exist.\n", group, event);
+ return -ENOENT;
+ }
+ /* delete an event */
+ unregister_trace_probe(tp);
+ free_trace_probe(tp);
+ return 0;
+ }
+
+ if (argc < 2) {
+ pr_info("Probe point is not specified.\n");
+ return -EINVAL;
+ }
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2;
/* setup a probe */
- if (!group)
- group = KPROBE_EVENT_SYSTEM;
if (!event) {
/* Make a new event name */
if (symbol)
@@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
- if (!ret)
+ if (ret)
return ret;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
@@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
- if (!ret)
+ if (ret)
return ret;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index ddfa0fd43bc0..acb87d4a4ac1 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
}
#endif /* CONFIG_PROFILE_KSYM_TRACER */
-void ksym_hbp_handler(struct perf_event *hbp, void *data)
+void ksym_hbp_handler(struct perf_event *hbp, int nmi,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
{
struct ring_buffer_event *event;
struct ksym_trace_entry *entry;
- struct pt_regs *regs = data;
struct ring_buffer *buffer;
int pc;