summaryrefslogtreecommitdiff
path: root/kernel/trace/trace_events.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace/trace_events.c')
-rw-r--r--kernel/trace/trace_events.c614
1 files changed, 469 insertions, 145 deletions
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 513de9ceb80e..c46e623e7e0d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -360,7 +360,7 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
/* Anything else, this isn't a function */
break;
}
- /* A function could be wrapped in parethesis, try the next one */
+ /* A function could be wrapped in parenthesis, try the next one */
s = r + 1;
} while (s < e);
@@ -400,6 +400,20 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca
return true;
}
+static void handle_dereference_arg(const char *arg_str, u64 string_flags, int len,
+ u64 *dereference_flags, int arg,
+ struct trace_event_call *call)
+{
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(arg_str, len, call))
+ *dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(arg_str, len, call))
+ *dereference_flags &= ~(1ULL << arg);
+ else
+ pr_warn("TRACE EVENT ERROR: Bad dereference argument: '%.*s'\n",
+ len, arg_str);
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -470,6 +484,7 @@ static void test_event_printk(struct trace_event_call *call)
case '%':
continue;
case 'p':
+ do_pointer:
/* Find dereferencing fields */
switch (fmt[i + 1]) {
case 'B': case 'R': case 'r':
@@ -498,6 +513,12 @@ static void test_event_printk(struct trace_event_call *call)
continue;
if (fmt[i + j] == '*') {
star = true;
+ /* Handle %*pbl case */
+ if (!j && fmt[i + 1] == 'p') {
+ arg++;
+ i++;
+ goto do_pointer;
+ }
continue;
}
if ((fmt[i + j] == 's')) {
@@ -546,7 +567,7 @@ static void test_event_printk(struct trace_event_call *call)
* If start_arg is zero, then this is the start of the
* first argument. The processing of the argument happens
* when the end of the argument is found, as it needs to
- * handle paranthesis and such.
+ * handle parenthesis and such.
*/
if (!start_arg) {
start_arg = i;
@@ -556,11 +577,9 @@ static void test_event_printk(struct trace_event_call *call)
}
if (dereference_flags & (1ULL << arg)) {
- if (string_flags & (1ULL << arg)) {
- if (process_string(fmt + start_arg, e - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
- } else if (process_pointer(fmt + start_arg, e - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
+ handle_dereference_arg(fmt + start_arg, string_flags,
+ e - start_arg,
+ &dereference_flags, arg, call);
}
start_arg = i;
@@ -571,11 +590,9 @@ static void test_event_printk(struct trace_event_call *call)
}
if (dereference_flags & (1ULL << arg)) {
- if (string_flags & (1ULL << arg)) {
- if (process_string(fmt + start_arg, i - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
- } else if (process_pointer(fmt + start_arg, i - start_arg, call))
- dereference_flags &= ~(1ULL << arg);
+ handle_dereference_arg(fmt + start_arg, string_flags,
+ i - start_arg,
+ &dereference_flags, arg, call);
}
/*
@@ -615,7 +632,6 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init);
bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
{
struct trace_array *tr = trace_file->tr;
- struct trace_array_cpu *data;
struct trace_pid_list *no_pid_list;
struct trace_pid_list *pid_list;
@@ -625,12 +641,30 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
if (!pid_list && !no_pid_list)
return false;
- data = this_cpu_ptr(tr->array_buffer.data);
-
- return data->ignore_pid;
+ /*
+ * This is recorded at every sched_switch for this task.
+ * Thus, even if the task migrates the ignore value will be the same.
+ */
+ return this_cpu_read(tr->array_buffer.data->ignore_pid) != 0;
}
EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
+/**
+ * trace_event_buffer_reserve - reserve space on the ring buffer for an event
+ * @fbuffer: information about how to save the event
+ * @trace_file: the instance file descriptor for the event
+ * @len: The length of the event
+ *
+ * The @fbuffer has information about the ring buffer and data will
+ * be added to it to be used by the call to trace_event_buffer_commit().
+ * The @trace_file is the desrciptor with information about the status
+ * of the given event for a specific trace_array instance.
+ * The @len is the length of data to save for the event.
+ *
+ * Returns a pointer to the data on the ring buffer or NULL if the
+ * event was not reserved (event was filtered, too big, or the buffer
+ * simply was disabled for write).
+ */
void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
struct trace_event_file *trace_file,
unsigned long len)
@@ -682,6 +716,8 @@ int trace_event_reg(struct trace_event_call *call,
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
+ if (!call->class->perf_probe)
+ return -ENODEV;
return tracepoint_probe_register(call->tp,
call->class->perf_probe,
call);
@@ -750,6 +786,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
+ bool soft_mode = atomic_read(&file->sm_ref) != 0;
int ret = 0;
int disable;
@@ -764,19 +801,19 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
* is set we do not want the event to be enabled before we
* clear the bit.
*
- * When soft_disable is not set but the SOFT_MODE flag is,
+ * When soft_disable is not set but the soft_mode is,
* we do nothing. Do not disable the tracepoint, otherwise
- * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
+ * "soft enable"s (clearing the SOFT_DISABLED bit) won't work.
*/
if (soft_disable) {
if (atomic_dec_return(&file->sm_ref) > 0)
break;
disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
- clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+ soft_mode = false;
/* Disable use of trace_buffered_event */
trace_buffered_event_disable();
} else
- disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
+ disable = !soft_mode;
if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
@@ -790,10 +827,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
}
- call->class->reg(call, TRACE_REG_UNREGISTER, file);
+ ret = call->class->reg(call, TRACE_REG_UNREGISTER, file);
+
+ WARN_ON_ONCE(ret);
}
- /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
- if (file->flags & EVENT_FILE_FL_SOFT_MODE)
+ /* If in soft mode, just set the SOFT_DISABLE_BIT, else clear it */
+ if (soft_mode)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
else
clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
@@ -803,16 +842,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
* When soft_disable is set and enable is set, we want to
* register the tracepoint for the event, but leave the event
* as is. That means, if the event was already enabled, we do
- * nothing (but set SOFT_MODE). If the event is disabled, we
- * set SOFT_DISABLED before enabling the event tracepoint, so
- * it still seems to be disabled.
+ * nothing. If the event is disabled, we set SOFT_DISABLED
+ * before enabling the event tracepoint, so it still seems
+ * to be disabled.
*/
if (!soft_disable)
clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
else {
if (atomic_inc_return(&file->sm_ref) > 1)
break;
- set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
/* Enable use of trace_buffered_event */
trace_buffered_event_enable();
}
@@ -820,17 +858,17 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
bool cmd = false, tgid = false;
- /* Keep the event disabled, when going to SOFT_MODE. */
+ /* Keep the event disabled, when going to soft mode. */
if (soft_disable)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
- if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+ if (tr->trace_flags & TRACE_ITER(RECORD_CMD)) {
cmd = true;
tracing_start_cmdline_record();
set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
- if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) {
tgid = true;
tracing_start_tgid_record();
set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
@@ -938,7 +976,7 @@ static int cache_mod(struct trace_array *tr, const char *mod, int set,
if (!set)
return remove_cache_mod(tr, mod, match, system, event);
- event_mod = kzalloc(sizeof(*event_mod), GFP_KERNEL);
+ event_mod = kzalloc_obj(*event_mod);
if (!event_mod)
return -ENOMEM;
@@ -1001,6 +1039,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_raw(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, NULL, task);
@@ -1016,6 +1055,7 @@ event_filter_pid_sched_process_fork(void *data,
struct trace_pid_list *pid_list;
struct trace_array *tr = data;
+ guard(preempt)();
pid_list = rcu_dereference_sched(tr->filtered_pids);
trace_filter_add_remove_task(pid_list, self, task);
@@ -1273,6 +1313,9 @@ static void remove_event_file_dir(struct trace_event_file *file)
free_event_filter(file->filter);
file->flags |= EVENT_FILE_FL_FREED;
event_file_put(file);
+
+ /* Wake up hist poll waiters to notice the EVENT_FILE_FL_FREED flag. */
+ hist_poll_wakeup();
}
/*
@@ -1358,6 +1401,9 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
{
int ret;
+ if (trace_array_is_readonly(tr))
+ return -EACCES;
+
mutex_lock(&event_mutex);
ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod);
mutex_unlock(&event_mutex);
@@ -1373,7 +1419,7 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
if (!tr)
return -ENOENT;
- /* Modules events can be appened with :mod:<module> */
+ /* Modules events can be appended with :mod:<module> */
mod = strstr(buf, ":mod:");
if (mod) {
*mod = '\0';
@@ -1607,12 +1653,11 @@ static void *s_start(struct seq_file *m, loff_t *pos)
struct set_event_iter *iter;
loff_t l;
- iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ iter = kzalloc_obj(*iter);
+ mutex_lock(&event_mutex);
if (!iter)
return NULL;
- mutex_lock(&event_mutex);
-
iter->type = SET_EVENT_FILE;
iter->file = list_entry(&tr->events, struct trace_event_file, list);
@@ -1641,6 +1686,82 @@ static void t_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}
+static int get_call_len(struct trace_event_call *call)
+{
+ int len;
+
+ /* Get the length of "<system>:<event>" */
+ len = strlen(call->class->system) + 1;
+ len += strlen(trace_event_name(call));
+
+ /* Set the index to 32 bytes to separate event from data */
+ return len >= 32 ? 1 : 32 - len;
+}
+
+/**
+ * t_show_filters - seq_file callback to display active event filters
+ * @m: The seq_file interface for formatted output
+ * @v: The current trace_event_file being iterated
+ *
+ * Identifies and prints active filters for the current event file in the
+ * iteration. If a filter is applied to the current event and, if so,
+ * prints the system name, event name, and the filter string.
+ */
+static int t_show_filters(struct seq_file *m, void *v)
+{
+ struct trace_event_file *file = v;
+ struct trace_event_call *call = file->event_call;
+ struct event_filter *filter;
+ int len;
+
+ guard(rcu)();
+ filter = rcu_dereference(file->filter);
+ if (!filter || !filter->filter_string)
+ return 0;
+
+ len = get_call_len(call);
+
+ seq_printf(m, "%s:%s%*s%s\n", call->class->system,
+ trace_event_name(call), len, "", filter->filter_string);
+
+ return 0;
+}
+
+/**
+ * t_show_triggers - seq_file callback to display active event triggers
+ * @m: The seq_file interface for formatted output
+ * @v: The current trace_event_file being iterated
+ *
+ * Iterates through the trigger list of the current event file and prints
+ * each active trigger's configuration using its associated print
+ * operation.
+ */
+static int t_show_triggers(struct seq_file *m, void *v)
+{
+ struct trace_event_file *file = v;
+ struct trace_event_call *call = file->event_call;
+ struct event_trigger_data *data;
+ int len;
+
+ /*
+ * The event_mutex is held by t_start(), protecting the
+ * file->triggers list traversal.
+ */
+ if (list_empty(&file->triggers))
+ return 0;
+
+ len = get_call_len(call);
+
+ list_for_each_entry_rcu(data, &file->triggers, list) {
+ seq_printf(m, "%s:%s%*s", call->class->system,
+ trace_event_name(call), len, "");
+
+ data->cmd_ops->print(m, data);
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_MODULES
static int s_show(struct seq_file *m, void *v)
{
@@ -1772,8 +1893,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
!(flags & EVENT_FILE_FL_SOFT_DISABLED))
strcpy(buf, "1");
- if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
- flags & EVENT_FILE_FL_SOFT_MODE)
+ if (atomic_read(&file->sm_ref) != 0)
strcat(buf, "*");
strcat(buf, "\n");
@@ -1818,28 +1938,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
return cnt;
}
-static ssize_t
-system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+/*
+ * Returns:
+ * 0 : no events exist?
+ * 1 : all events are disabled
+ * 2 : all events are enabled
+ * 3 : some events are enabled and some are enabled
+ */
+int trace_events_enabled(struct trace_array *tr, const char *system)
{
- const char set_to_char[4] = { '?', '0', '1', 'X' };
- struct trace_subsystem_dir *dir = filp->private_data;
- struct event_subsystem *system = dir->subsystem;
struct trace_event_call *call;
struct trace_event_file *file;
- struct trace_array *tr = dir->tr;
- char buf[2];
int set = 0;
- int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
+
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
!trace_event_name(call) || !call->class || !call->class->reg)
continue;
- if (system && strcmp(call->class->system, system->name) != 0)
+ if (system && strcmp(call->class->system, system) != 0)
continue;
/*
@@ -1855,7 +1975,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
if (set == 3)
break;
}
- mutex_unlock(&event_mutex);
+
+ return set;
+}
+
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ const char set_to_char[4] = { '?', '0', '1', 'X' };
+ struct trace_subsystem_dir *dir = filp->private_data;
+ struct event_subsystem *system = dir->subsystem;
+ struct trace_array *tr = dir->tr;
+ char buf[2];
+ int set;
+ int ret;
+
+ set = trace_events_enabled(tr, system ? system->name : NULL);
buf[0] = set_to_char[set];
buf[1] = '\n';
@@ -2051,12 +2187,12 @@ static int trace_format_open(struct inode *inode, struct file *file)
static ssize_t
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
{
- int id = (long)event_file_data(filp);
+ /* id is directly in i_private and available for inode's lifetime. */
+ int id = (long)file_inode(filp)->i_private;
char buf[32];
int len;
- if (unlikely(!id))
- return -ENODEV;
+ WARN_ON(!id);
len = sprintf(buf, "%d\n", id);
@@ -2075,7 +2211,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
if (*ppos)
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kmalloc_obj(*s);
if (!s)
return -ENOMEM;
@@ -2114,12 +2250,8 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
file = event_file_file(filp);
- if (file) {
- if (file->flags & EVENT_FILE_FL_FREED)
- err = -ENODEV;
- else
- err = apply_event_filter(file, buf);
- }
+ if (file)
+ err = apply_event_filter(file, buf);
mutex_unlock(&event_mutex);
kfree(buf);
@@ -2140,7 +2272,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
struct event_subsystem *system = NULL;
int ret;
- if (tracing_is_disabled())
+ if (unlikely(tracing_disabled))
return -ENODEV;
/* Make sure the system still exists */
@@ -2189,7 +2321,7 @@ static int system_tr_open(struct inode *inode, struct file *filp)
int ret;
/* Make a temporary dir that has no system but points to tr */
- dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+ dir = kzalloc_obj(*dir);
if (!dir)
return -ENOMEM;
@@ -2235,7 +2367,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
if (*ppos)
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kmalloc_obj(*s);
if (!s)
return -ENOMEM;
@@ -2285,7 +2417,7 @@ show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *
if (*ppos)
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kmalloc_obj(*s);
if (!s)
return -ENOMEM;
@@ -2309,7 +2441,7 @@ show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t
if (*ppos)
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kmalloc_obj(*s);
if (!s)
return -ENOMEM;
@@ -2453,6 +2585,8 @@ ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
static int ftrace_event_set_open(struct inode *inode, struct file *file);
+static int ftrace_event_show_filters_open(struct inode *inode, struct file *file);
+static int ftrace_event_show_triggers_open(struct inode *inode, struct file *file);
static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
static int ftrace_event_release(struct inode *inode, struct file *file);
@@ -2471,6 +2605,20 @@ static const struct seq_operations show_set_event_seq_ops = {
.stop = s_stop,
};
+static const struct seq_operations show_show_event_filters_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .show = t_show_filters,
+ .stop = t_stop,
+};
+
+static const struct seq_operations show_show_event_triggers_seq_ops = {
+ .start = t_start,
+ .next = t_next,
+ .show = t_show_triggers,
+ .stop = t_stop,
+};
+
static const struct seq_operations show_set_pid_seq_ops = {
.start = p_start,
.next = p_next,
@@ -2500,6 +2648,20 @@ static const struct file_operations ftrace_set_event_fops = {
.release = ftrace_event_release,
};
+static const struct file_operations ftrace_show_event_filters_fops = {
+ .open = ftrace_event_show_filters_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct file_operations ftrace_show_event_triggers_fops = {
+ .open = ftrace_event_show_triggers_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static const struct file_operations ftrace_set_event_pid_fops = {
.open = ftrace_event_set_pid_open,
.read = seq_read,
@@ -2644,6 +2806,34 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
return ret;
}
+/**
+ * ftrace_event_show_filters_open - open interface for set_event_filters
+ * @inode: The inode of the file
+ * @file: The file being opened
+ *
+ * Connects the set_event_filters file to the sequence operations
+ * required to iterate over and display active event filters.
+ */
+static int
+ftrace_event_show_filters_open(struct inode *inode, struct file *file)
+{
+ return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops);
+}
+
+/**
+ * ftrace_event_show_triggers_open - open interface for show_event_triggers
+ * @inode: The inode of the file
+ * @file: The file being opened
+ *
+ * Connects the show_event_triggers file to the sequence operations
+ * required to iterate over and display active event triggers.
+ */
+static int
+ftrace_event_show_triggers_open(struct inode *inode, struct file *file)
+{
+ return ftrace_event_open(inode, file, &show_show_event_triggers_seq_ops);
+}
+
static int
ftrace_event_set_pid_open(struct inode *inode, struct file *file)
{
@@ -2692,7 +2882,7 @@ create_new_subsystem(const char *name)
struct event_subsystem *system;
/* need to create new entry */
- system = kmalloc(sizeof(*system), GFP_KERNEL);
+ system = kmalloc_obj(*system);
if (!system)
return NULL;
@@ -2703,7 +2893,7 @@ create_new_subsystem(const char *name)
if (!system->name)
goto out_free;
- system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+ system->filter = kzalloc_obj(struct event_filter);
if (!system->filter)
goto out_free;
@@ -2771,7 +2961,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
}
- dir = kmalloc(sizeof(*dir), GFP_KERNEL);
+ dir = kmalloc_obj(*dir);
if (!dir)
goto out_fail;
@@ -2782,8 +2972,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
} else
__get_system(system);
- /* ftrace only has directories no files */
- if (strcmp(name, "ftrace") == 0)
+ /* ftrace only has directories no files, readonly instance too. */
+ if (strcmp(name, "ftrace") == 0 || trace_array_is_readonly(tr))
nr_entries = 0;
else
nr_entries = ARRAY_SIZE(system_entries);
@@ -2948,28 +3138,30 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
int ret;
static struct eventfs_entry event_entries[] = {
{
- .name = "enable",
+ .name = "format",
.callback = event_callback,
- .release = event_release,
},
+#ifdef CONFIG_PERF_EVENTS
{
- .name = "filter",
+ .name = "id",
.callback = event_callback,
},
+#endif
+#define NR_RO_EVENT_ENTRIES (1 + IS_ENABLED(CONFIG_PERF_EVENTS))
+/* Readonly files must be above this line and counted by NR_RO_EVENT_ENTRIES. */
{
- .name = "trigger",
+ .name = "enable",
.callback = event_callback,
+ .release = event_release,
},
{
- .name = "format",
+ .name = "filter",
.callback = event_callback,
},
-#ifdef CONFIG_PERF_EVENTS
{
- .name = "id",
+ .name = "trigger",
.callback = event_callback,
},
-#endif
#ifdef CONFIG_HIST_TRIGGERS
{
.name = "hist",
@@ -3002,7 +3194,10 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
if (!e_events)
return -ENOMEM;
- nr_entries = ARRAY_SIZE(event_entries);
+ if (trace_array_is_readonly(tr))
+ nr_entries = NR_RO_EVENT_ENTRIES;
+ else
+ nr_entries = ARRAY_SIZE(event_entries);
name = trace_event_name(call);
ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
@@ -3100,7 +3295,10 @@ __register_event(struct trace_event_call *call, struct module *mod)
if (ret < 0)
return ret;
+ down_write(&trace_event_sem);
list_add(&call->list, &ftrace_events);
+ up_write(&trace_event_sem);
+
if (call->flags & TRACE_EVENT_FL_DYNAMIC)
atomic_set(&call->refcnt, 0);
else
@@ -3211,7 +3409,7 @@ static void add_str_to_module(struct module *module, char *str)
{
struct module_string *modstr;
- modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
+ modstr = kmalloc_obj(*modstr);
/*
* If we failed to allocate memory here, then we'll just
@@ -3228,43 +3426,120 @@ static void add_str_to_module(struct module *module, char *str)
list_add(&modstr->next, &module_strings);
}
+#define ATTRIBUTE_STR "__attribute__("
+#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1)
+
+/* Remove all __attribute__() from @type. Return allocated string or @type. */
+static char *sanitize_field_type(const char *type)
+{
+ char *attr, *tmp, *next, *ret = (char *)type;
+ int depth;
+
+ next = (char *)type;
+ while ((attr = strstr(next, ATTRIBUTE_STR))) {
+ /* Retry if "__attribute__(" is a part of another word. */
+ if (attr != next && !isspace(attr[-1])) {
+ next = attr + ATTRIBUTE_STR_LEN;
+ continue;
+ }
+
+ if (ret == type) {
+ ret = kstrdup(type, GFP_KERNEL);
+ if (WARN_ON_ONCE(!ret))
+ return NULL;
+ attr = ret + (attr - type);
+ }
+
+ /* the ATTRIBUTE_STR already has the first '(' */
+ depth = 1;
+ next = attr + ATTRIBUTE_STR_LEN;
+ do {
+ tmp = strpbrk(next, "()");
+ /* There is unbalanced parentheses */
+ if (WARN_ON_ONCE(!tmp)) {
+ kfree(ret);
+ return (char *)type;
+ }
+
+ if (*tmp == '(')
+ depth++;
+ else
+ depth--;
+ next = tmp + 1;
+ } while (depth > 0);
+ next = skip_spaces(next);
+ strcpy(attr, next);
+ next = attr;
+ }
+ return ret;
+}
+
+static char *find_replacable_eval(const char *type, const char *eval_string,
+ int len)
+{
+ char *ptr;
+
+ if (!eval_string)
+ return NULL;
+
+ ptr = strchr(type, '[');
+ if (!ptr)
+ return NULL;
+ ptr++;
+
+ if (!isalpha(*ptr) && *ptr != '_')
+ return NULL;
+
+ if (strncmp(eval_string, ptr, len) != 0)
+ return NULL;
+
+ return ptr;
+}
+
static void update_event_fields(struct trace_event_call *call,
struct trace_eval_map *map)
{
struct ftrace_event_field *field;
+ const char *eval_string = NULL;
struct list_head *head;
+ int len = 0;
char *ptr;
char *str;
- int len = strlen(map->eval_string);
/* Dynamic events should never have field maps */
- if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
+ if (call->flags & TRACE_EVENT_FL_DYNAMIC)
return;
+ if (map) {
+ eval_string = map->eval_string;
+ len = strlen(map->eval_string);
+ }
+
head = trace_get_fields(call);
list_for_each_entry(field, head, link) {
- ptr = strchr(field->type, '[');
- if (!ptr)
- continue;
- ptr++;
-
- if (!isalpha(*ptr) && *ptr != '_')
- continue;
+ str = sanitize_field_type(field->type);
+ if (!str)
+ return;
- if (strncmp(map->eval_string, ptr, len) != 0)
- continue;
+ ptr = find_replacable_eval(str, eval_string, len);
+ if (ptr) {
+ if (str == field->type) {
+ str = kstrdup(field->type, GFP_KERNEL);
+ if (WARN_ON_ONCE(!str))
+ return;
+ ptr = str + (ptr - field->type);
+ }
- str = kstrdup(field->type, GFP_KERNEL);
- if (WARN_ON_ONCE(!str))
- return;
- ptr = str + (ptr - field->type);
- ptr = eval_replace(ptr, map, len);
- /* enum/sizeof string smaller than value */
- if (WARN_ON_ONCE(!ptr)) {
- kfree(str);
- continue;
+ ptr = eval_replace(ptr, map, len);
+ /* enum/sizeof string smaller than value */
+ if (WARN_ON_ONCE(!ptr)) {
+ kfree(str);
+ continue;
+ }
}
+ if (str == field->type)
+ continue;
/*
* If the event is part of a module, then we need to free the string
* when the module is removed. Otherwise, it will stay allocated
@@ -3274,14 +3549,18 @@ static void update_event_fields(struct trace_event_call *call,
add_str_to_module(call->module, str);
field->type = str;
+ if (field->filter_type == FILTER_OTHER)
+ field->filter_type = filter_assign_type(field->type);
}
}
-void trace_event_eval_update(struct trace_eval_map **map, int len)
+/* Update all events for replacing eval and sanitizing */
+void trace_event_update_all(struct trace_eval_map **map, int len)
{
struct trace_event_call *call, *p;
const char *last_system = NULL;
bool first = false;
+ bool updated;
int last_i;
int i;
@@ -3294,6 +3573,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
last_system = call->class->system;
}
+ updated = false;
/*
* Since calls are grouped by systems, the likelihood that the
* next call in the iteration belongs to the same system as the
@@ -3313,8 +3593,12 @@ void trace_event_eval_update(struct trace_eval_map **map, int len)
}
update_event_printk(call, map[i]);
update_event_fields(call, map[i]);
+ updated = true;
}
}
+ /* If not updated yet, update field for sanitizing. */
+ if (!updated)
+ update_event_fields(call, NULL);
cond_resched();
}
up_write(&trace_event_sem);
@@ -3399,20 +3683,27 @@ static struct boot_triggers {
} bootup_triggers[MAX_BOOT_TRIGGERS];
static char bootup_trigger_buf[COMMAND_LINE_SIZE];
+static int boot_trigger_buf_len;
static int nr_boot_triggers;
static __init int setup_trace_triggers(char *str)
{
char *trigger;
char *buf;
+ int len = boot_trigger_buf_len;
int i;
- strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
+ if (len >= COMMAND_LINE_SIZE)
+ return 1;
+
+ strscpy(bootup_trigger_buf + len, str, COMMAND_LINE_SIZE - len);
trace_set_ring_buffer_expanded(NULL);
disable_tracing_selftest("running event triggers");
- buf = bootup_trigger_buf;
- for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
+ buf = bootup_trigger_buf + len;
+ boot_trigger_buf_len += strlen(buf) + 1;
+
+ for (i = nr_boot_triggers; i < MAX_BOOT_TRIGGERS; i++) {
trigger = strsep(&buf, ",");
if (!trigger)
break;
@@ -3548,7 +3839,7 @@ static int probe_remove_event_call(struct trace_event_call *call)
continue;
/*
* We can't rely on ftrace_event_enable_disable(enable => 0)
- * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
+ * we are going to do, soft mode can suppress
* TRACE_REG_UNREGISTER.
*/
if (file->flags & EVENT_FILE_FL_ENABLED)
@@ -3659,7 +3950,7 @@ static void trace_module_remove_events(struct module *mod)
if (call->module == mod)
__trace_remove_event_call(call);
}
- /* Check for any strings allocade for this module */
+ /* Check for any strings allocated for this module */
list_for_each_entry_safe(modstr, m, &module_strings, next) {
if (modstr->module != mod)
continue;
@@ -3714,6 +4005,8 @@ __trace_add_event_dirs(struct trace_array *tr)
struct trace_event_call *call;
int ret;
+ lockdep_assert_held(&trace_event_sem);
+
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
@@ -3836,11 +4129,6 @@ void trace_put_event_file(struct trace_event_file *file)
EXPORT_SYMBOL_GPL(trace_put_event_file);
#ifdef CONFIG_DYNAMIC_FTRACE
-
-/* Avoid typos */
-#define ENABLE_EVENT_STR "enable_event"
-#define DISABLE_EVENT_STR "disable_event"
-
struct event_probe_data {
struct trace_event_file *file;
unsigned long count;
@@ -3961,7 +4249,7 @@ static int free_probe_data(void *data)
edata->ref--;
if (!edata->ref) {
- /* Remove the SOFT_MODE flag */
+ /* Remove soft mode */
__ftrace_event_enable_disable(edata->file, 0, 1);
trace_event_put_ref(edata->file->event_call);
kfree(edata);
@@ -4090,7 +4378,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
goto out_put;
ret = -ENOMEM;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc_obj(*data);
if (!data)
goto out_put;
@@ -4216,7 +4504,11 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
static __init int setup_trace_event(char *str)
{
- strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
+ if (bootup_event_buf[0] != '\0')
+ strlcat(bootup_event_buf, ",", COMMAND_LINE_SIZE);
+
+ strlcat(bootup_event_buf, str, COMMAND_LINE_SIZE);
+
trace_set_ring_buffer_expanded(NULL);
disable_tracing_selftest("running event tracing");
@@ -4255,25 +4547,44 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
int nr_entries;
static struct eventfs_entry events_entries[] = {
{
- .name = "enable",
+ .name = "header_page",
.callback = events_callback,
},
{
- .name = "header_page",
+ .name = "header_event",
.callback = events_callback,
},
+#define NR_RO_TOP_ENTRIES 2
+/* Readonly files must be above this line and counted by NR_RO_TOP_ENTRIES. */
{
- .name = "header_event",
+ .name = "enable",
.callback = events_callback,
},
};
- entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
- tr, &ftrace_set_event_fops);
- if (!entry)
- return -ENOMEM;
+ if (!trace_array_is_readonly(tr)) {
+ entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_fops);
+ if (!entry)
+ return -ENOMEM;
- nr_entries = ARRAY_SIZE(events_entries);
+ /* There are not as crucial, just warn if they are not created */
+ trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr,
+ &ftrace_show_event_filters_fops);
+
+ trace_create_file("show_event_triggers", TRACE_MODE_READ, parent, tr,
+ &ftrace_show_event_triggers_fops);
+
+ trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
+ tr, &ftrace_set_event_pid_fops);
+
+ trace_create_file("set_event_notrace_pid",
+ TRACE_MODE_WRITE, parent, tr,
+ &ftrace_set_event_notrace_pid_fops);
+ nr_entries = ARRAY_SIZE(events_entries);
+ } else {
+ nr_entries = NR_RO_TOP_ENTRIES;
+ }
e_events = eventfs_create_events_dir("events", parent, events_entries,
nr_entries, tr);
@@ -4282,15 +4593,6 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
return -ENOMEM;
}
- /* There are not as crucial, just warn if they are not created */
-
- trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
- tr, &ftrace_set_event_pid_fops);
-
- trace_create_file("set_event_notrace_pid",
- TRACE_MODE_WRITE, parent, tr,
- &ftrace_set_event_notrace_pid_fops);
-
tr->event_dir = e_events;
return 0;
@@ -4387,26 +4689,22 @@ static __init int event_trace_memsetup(void)
return 0;
}
-__init void
-early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+/*
+ * Helper function to enable or disable a comma-separated list of events
+ * from the bootup buffer.
+ */
+static __init void __early_set_events(struct trace_array *tr, char *buf, bool enable)
{
char *token;
- int ret;
-
- while (true) {
- token = strsep(&buf, ",");
-
- if (!token)
- break;
+ while ((token = strsep(&buf, ","))) {
if (*token) {
- /* Restarting syscalls requires that we stop them first */
- if (disable_first)
+ if (enable) {
+ if (ftrace_set_clr_event(tr, token, 1))
+ pr_warn("Failed to enable trace event: %s\n", token);
+ } else {
ftrace_set_clr_event(tr, token, 0);
-
- ret = ftrace_set_clr_event(tr, token, 1);
- if (ret)
- pr_warn("Failed to enable trace event: %s\n", token);
+ }
}
/* Put back the comma to allow this to be called again */
@@ -4415,6 +4713,32 @@ early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
}
}
+/**
+ * early_enable_events - enable events from the bootup buffer
+ * @tr: The trace array to enable the events in
+ * @buf: The buffer containing the comma separated list of events
+ * @disable_first: If true, disable all events in @buf before enabling them
+ *
+ * This function enables events from the bootup buffer. If @disable_first
+ * is true, it will first disable all events in the buffer before enabling
+ * them.
+ *
+ * For syscall events, which rely on a global refcount to register the
+ * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must
+ * ensure the refcount hits zero before re-enabling them. A simple
+ * "disable then enable" per-event is not enough if multiple syscalls are
+ * used, as the refcount will stay above zero. Thus, we need a two-phase
+ * approach: disable all, then enable all.
+ */
+__init void
+early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
+{
+ if (disable_first)
+ __early_set_events(tr, buf, false);
+
+ __early_set_events(tr, buf, true);
+}
+
static __init int event_trace_enable(void)
{
struct trace_array *tr = top_trace_array();