summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-20 13:17:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-20 13:17:47 -0700
commit8b6ddd10d678bebec32381f71b6b420bafc43ad0 (patch)
tree308c7484988c318dcf7fc464e2d4b75bcb841ba4 /kernel
parenteede2b9b3fe01168940bb42ff3ab502ef5f6375c (diff)
parent026bb845b0fff6dec91fe24511dad7d3067dc3ed (diff)
downloadlwn-8b6ddd10d678bebec32381f71b6b420bafc43ad0.tar.gz
lwn-8b6ddd10d678bebec32381f71b6b420bafc43ad0.zip
Merge tag 'trace-v5.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace
Pull tracing fixes from Steven Rostedt: - Have recordmcount work with > 64K sections (to support LTO) - kprobe RCU fixes - Correct a kprobe critical section with missing mutex - Remove redundant arch_disarm_kprobe() call - Fix lockup when kretprobe triggers within kprobe_flush_task() - Fix memory leak in fetch_op_data operations - Fix sleep in atomic in ftrace trace array sample code - Free up memory on failure in sample trace array code - Fix incorrect reporting of function_graph fields in format file - Fix quote within quote parsing in bootconfig - Fix return value of bootconfig tool - Add testcases for bootconfig tool - Fix maybe uninitialized warning in ftrace pid file code - Remove unused variable in tracing_iter_reset() - Fix some typos * tag 'trace-v5.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: ftrace: Fix maybe-uninitialized compiler warning tools/bootconfig: Add testcase for show-command and quotes test tools/bootconfig: Fix to return 0 if succeeded to show the bootconfig tools/bootconfig: Fix to use correct quotes for value proc/bootconfig: Fix to use correct quotes for value tracing: Remove unused event variable in tracing_iter_reset tracing/probe: Fix memleak in fetch_op_data operations trace: Fix typo in allocate_ftrace_ops()'s comment tracing: Make ftrace packed events have align of 1 sample-trace-array: Remove trace_array 'sample-instance' sample-trace-array: Fix sleeping function called from invalid context kretprobe: Prevent triggering kretprobe from within kprobe_flush_task kprobes: Remove redundant arch_disarm_kprobe() call kprobes: Fix to protect kick_kprobe_optimizer() by kprobe_mutex kprobes: Use non RCU traversal APIs on kprobe_tables if possible kprobes: Suppress the suspicious RCU warning on kprobes recordmcount: support >64k sections
Diffstat (limited to 'kernel')
-rw-r--r--kernel/kprobes.c61
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h14
-rw-r--r--kernel/trace/trace_export.c16
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_probe.c4
8 files changed, 88 insertions, 27 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 50cd84f53df0..4a904cc56d68 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -46,6 +46,11 @@
static int kprobes_initialized;
+/* kprobe_table can be accessed by
+ * - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
+ * Or
+ * - RCU hlist traversal under disabling preempt (breakpoint handlers)
+ */
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
@@ -326,7 +331,8 @@ struct kprobe *get_kprobe(void *addr)
struct kprobe *p;
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry_rcu(p, head, hlist,
+ lockdep_is_held(&kprobe_mutex)) {
if (p->addr == addr)
return p;
}
@@ -586,11 +592,12 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
- mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
kick_kprobe_optimizer();
+
+ mutex_unlock(&kprobe_mutex);
}
/* Wait for completing optimization and unoptimization */
@@ -668,8 +675,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op)
lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- if (kprobe_disabled(&op->kp))
- arch_disarm_kprobe(&op->kp);
}
/* Unoptimize a kprobe if p is optimized */
@@ -849,7 +854,7 @@ static void optimize_all_kprobes(void)
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
@@ -876,7 +881,7 @@ static void unoptimize_all_kprobes(void)
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p))
unoptimize_kprobe(p, false);
}
@@ -1236,6 +1241,26 @@ __releases(hlist_lock)
}
NOKPROBE_SYMBOL(kretprobe_table_unlock);
+struct kprobe kprobe_busy = {
+ .addr = (void *) get_kprobe,
+};
+
+void kprobe_busy_begin(void)
+{
+ struct kprobe_ctlblk *kcb;
+
+ preempt_disable();
+ __this_cpu_write(current_kprobe, &kprobe_busy);
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+}
+
+void kprobe_busy_end(void)
+{
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+}
+
/*
* This function is called from finish_task_switch when task tk becomes dead,
* so that we can recycle any function-return probe instances associated
@@ -1253,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk)
/* Early boot. kretprobe_table_locks not yet initialized. */
return;
+ kprobe_busy_begin();
+
INIT_HLIST_HEAD(&empty_rp);
hash = hash_ptr(tk, KPROBE_HASH_BITS);
head = &kretprobe_inst_table[hash];
@@ -1266,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk)
hlist_del(&ri->hlist);
kfree(ri);
}
+
+ kprobe_busy_end();
}
NOKPROBE_SYMBOL(kprobe_flush_task);
@@ -1499,12 +1528,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
{
struct kprobe *ap, *list_p;
+ lockdep_assert_held(&kprobe_mutex);
+
ap = get_kprobe(p->addr);
if (unlikely(!ap))
return NULL;
if (p != ap) {
- list_for_each_entry_rcu(list_p, &ap->list, list)
+ list_for_each_entry(list_p, &ap->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid;
@@ -1669,7 +1700,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
{
struct kprobe *kp;
- list_for_each_entry_rcu(kp, &ap->list, list)
+ lockdep_assert_held(&kprobe_mutex);
+
+ list_for_each_entry(kp, &ap->list, list)
if (!kprobe_disabled(kp))
/*
* There is an active probe on the list.
@@ -1748,7 +1781,7 @@ static int __unregister_kprobe_top(struct kprobe *p)
else {
/* If disabling probe has special handlers, update aggrprobe */
if (p->post_handler && !kprobe_gone(p)) {
- list_for_each_entry_rcu(list_p, &ap->list, list) {
+ list_for_each_entry(list_p, &ap->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
@@ -2062,13 +2095,15 @@ static void kill_kprobe(struct kprobe *p)
{
struct kprobe *kp;
+ lockdep_assert_held(&kprobe_mutex);
+
p->flags |= KPROBE_FLAG_GONE;
if (kprobe_aggrprobe(p)) {
/*
* If this is an aggr_kprobe, we have to list all the
* chained probes and mark them GONE.
*/
- list_for_each_entry_rcu(kp, &p->list, list)
+ list_for_each_entry(kp, &p->list, list)
kp->flags |= KPROBE_FLAG_GONE;
p->post_handler = NULL;
kill_optimized_kprobe(p);
@@ -2312,7 +2347,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
@@ -2550,7 +2585,7 @@ static int arm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Arm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p)) {
err = arm_kprobe(p);
if (err) {
@@ -2593,7 +2628,7 @@ static int disarm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Disarm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
err = disarm_kprobe(p, false);
if (err) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c163c3531faf..1903b80db6eb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2260,7 +2260,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
if (hash_contains_ip(ip, op->func_hash))
return op;
- }
+ }
return NULL;
}
@@ -3599,7 +3599,7 @@ static int t_show(struct seq_file *m, void *v)
if (direct)
seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
}
- }
+ }
seq_putc(m, '\n');
@@ -7151,6 +7151,10 @@ static int pid_open(struct inode *inode, struct file *file, int type)
case TRACE_NO_PIDS:
seq_ops = &ftrace_no_pid_sops;
break;
+ default:
+ trace_array_put(tr);
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
ret = seq_open(file, seq_ops);
@@ -7229,6 +7233,10 @@ pid_write(struct file *filp, const char __user *ubuf,
other_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
break;
+ default:
+ ret = -EINVAL;
+ WARN_ON_ONCE(1);
+ goto out;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec44b0e2a19c..bb62269724d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3570,7 +3570,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
{
- struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter;
unsigned long entries = 0;
u64 ts;
@@ -3588,7 +3587,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
* that a reset never took place on a cpu. This is evident
* by the timestamp being before the start of the buffer.
*/
- while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+ while (ring_buffer_iter_peek(buf_iter, &ts)) {
if (ts >= iter->array_buffer->time_start)
break;
entries++;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index def769df5bf1..13db4000af3f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -61,6 +61,9 @@ enum trace_type {
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, size) type item[size];
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a523da0dae0a..18c4a58aff79 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -78,8 +78,8 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
- __field_desc( unsigned long, graph_ent, func )
- __field_desc( int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, func )
+ __field_packed( int, graph_ent, depth )
),
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
@@ -92,11 +92,11 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
- __field_desc( unsigned long, ret, func )
- __field_desc( unsigned long, ret, overrun )
- __field_desc( unsigned long long, ret, calltime)
- __field_desc( unsigned long long, ret, rettime )
- __field_desc( int, ret, depth )
+ __field_packed( unsigned long, ret, func )
+ __field_packed( unsigned long, ret, overrun )
+ __field_packed( unsigned long long, ret, calltime)
+ __field_packed( unsigned long long, ret, rettime )
+ __field_packed( int, ret, depth )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 77ce5a3b6773..70d3d0a09053 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -45,6 +45,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __field_desc
#define __field_desc(type, container, item) type item;
+#undef __field_packed
+#define __field_packed(type, container, item) type item;
+
#undef __array
#define __array(type, item, size) type item[size];
@@ -85,6 +88,13 @@ static void __always_unused ____ftrace_check_##name(void) \
.size = sizeof(_type), .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = _filter_type },
+
+#undef __field_ext_packed
+#define __field_ext_packed(_type, _item, _filter_type) { \
+ .type = #_type, .name = #_item, \
+ .size = sizeof(_type), .align = 1, \
+ is_signed_type(_type), .filter_type = _filter_type },
+
#undef __field
#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER)
@@ -94,6 +104,9 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __field_desc
#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER)
+#undef __field_packed
+#define __field_packed(_type, _container, _item) __field_ext_packed(_type, _item, FILTER_OTHER)
+
#undef __array
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
@@ -129,6 +142,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, len)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8a4c8d5c2c98..dd4dff71d89a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,7 +42,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
if (!ops)
return -ENOMEM;
- /* Currently only the non stack verision is supported */
+ /* Currently only the non stack version is supported */
ops->func = function_trace_call;
ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b8a928e925c7..d2867ccc6aca 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -639,8 +639,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
ret = -EINVAL;
goto fail;
}
- if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
- parg->count) {
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
+ code->op == FETCH_OP_DATA) || parg->count) {
/*
* IMM, DATA and COMM is pointing actual address, those
* must be kept, and if parg->count != 0, this is an