diff options
Diffstat (limited to 'kernel/trace/trace_events.c')
| -rw-r--r-- | kernel/trace/trace_events.c | 614 |
1 files changed, 469 insertions, 145 deletions
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 513de9ceb80e..c46e623e7e0d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -360,7 +360,7 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca /* Anything else, this isn't a function */ break; } - /* A function could be wrapped in parethesis, try the next one */ + /* A function could be wrapped in parenthesis, try the next one */ s = r + 1; } while (s < e); @@ -400,6 +400,20 @@ static bool process_string(const char *fmt, int len, struct trace_event_call *ca return true; } +static void handle_dereference_arg(const char *arg_str, u64 string_flags, int len, + u64 *dereference_flags, int arg, + struct trace_event_call *call) +{ + if (string_flags & (1ULL << arg)) { + if (process_string(arg_str, len, call)) + *dereference_flags &= ~(1ULL << arg); + } else if (process_pointer(arg_str, len, call)) + *dereference_flags &= ~(1ULL << arg); + else + pr_warn("TRACE EVENT ERROR: Bad dereference argument: '%.*s'\n", + len, arg_str); +} + /* * Examine the print fmt of the event looking for unsafe dereference * pointers using %p* that could be recorded in the trace event and @@ -470,6 +484,7 @@ static void test_event_printk(struct trace_event_call *call) case '%': continue; case 'p': + do_pointer: /* Find dereferencing fields */ switch (fmt[i + 1]) { case 'B': case 'R': case 'r': @@ -498,6 +513,12 @@ static void test_event_printk(struct trace_event_call *call) continue; if (fmt[i + j] == '*') { star = true; + /* Handle %*pbl case */ + if (!j && fmt[i + 1] == 'p') { + arg++; + i++; + goto do_pointer; + } continue; } if ((fmt[i + j] == 's')) { @@ -546,7 +567,7 @@ static void test_event_printk(struct trace_event_call *call) * If start_arg is zero, then this is the start of the * first argument. The processing of the argument happens * when the end of the argument is found, as it needs to - * handle paranthesis and such. + * handle parenthesis and such. */ if (!start_arg) { start_arg = i; @@ -556,11 +577,9 @@ static void test_event_printk(struct trace_event_call *call) } if (dereference_flags & (1ULL << arg)) { - if (string_flags & (1ULL << arg)) { - if (process_string(fmt + start_arg, e - start_arg, call)) - dereference_flags &= ~(1ULL << arg); - } else if (process_pointer(fmt + start_arg, e - start_arg, call)) - dereference_flags &= ~(1ULL << arg); + handle_dereference_arg(fmt + start_arg, string_flags, + e - start_arg, + &dereference_flags, arg, call); } start_arg = i; @@ -571,11 +590,9 @@ static void test_event_printk(struct trace_event_call *call) } if (dereference_flags & (1ULL << arg)) { - if (string_flags & (1ULL << arg)) { - if (process_string(fmt + start_arg, i - start_arg, call)) - dereference_flags &= ~(1ULL << arg); - } else if (process_pointer(fmt + start_arg, i - start_arg, call)) - dereference_flags &= ~(1ULL << arg); + handle_dereference_arg(fmt + start_arg, string_flags, + i - start_arg, + &dereference_flags, arg, call); } /* @@ -615,7 +632,6 @@ EXPORT_SYMBOL_GPL(trace_event_raw_init); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) { struct trace_array *tr = trace_file->tr; - struct trace_array_cpu *data; struct trace_pid_list *no_pid_list; struct trace_pid_list *pid_list; @@ -625,12 +641,30 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file) if (!pid_list && !no_pid_list) return false; - data = this_cpu_ptr(tr->array_buffer.data); - - return data->ignore_pid; + /* + * This is recorded at every sched_switch for this task. + * Thus, even if the task migrates the ignore value will be the same. + */ + return this_cpu_read(tr->array_buffer.data->ignore_pid) != 0; } EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid); +/** + * trace_event_buffer_reserve - reserve space on the ring buffer for an event + * @fbuffer: information about how to save the event + * @trace_file: the instance file descriptor for the event + * @len: The length of the event + * + * The @fbuffer has information about the ring buffer and data will + * be added to it to be used by the call to trace_event_buffer_commit(). + * The @trace_file is the desrciptor with information about the status + * of the given event for a specific trace_array instance. + * The @len is the length of data to save for the event. + * + * Returns a pointer to the data on the ring buffer or NULL if the + * event was not reserved (event was filtered, too big, or the buffer + * simply was disabled for write). + */ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, struct trace_event_file *trace_file, unsigned long len) @@ -682,6 +716,8 @@ int trace_event_reg(struct trace_event_call *call, #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: + if (!call->class->perf_probe) + return -ENODEV; return tracepoint_probe_register(call->tp, call->class->perf_probe, call); @@ -750,6 +786,7 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, { struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; + bool soft_mode = atomic_read(&file->sm_ref) != 0; int ret = 0; int disable; @@ -764,19 +801,19 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, * is set we do not want the event to be enabled before we * clear the bit. * - * When soft_disable is not set but the SOFT_MODE flag is, + * When soft_disable is not set but the soft_mode is, * we do nothing. Do not disable the tracepoint, otherwise - * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. + * "soft enable"s (clearing the SOFT_DISABLED bit) won't work. */ if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; - clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); + soft_mode = false; /* Disable use of trace_buffered_event */ trace_buffered_event_disable(); } else - disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); + disable = !soft_mode; if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); @@ -790,10 +827,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } - call->class->reg(call, TRACE_REG_UNREGISTER, file); + ret = call->class->reg(call, TRACE_REG_UNREGISTER, file); + + WARN_ON_ONCE(ret); } - /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ - if (file->flags & EVENT_FILE_FL_SOFT_MODE) + /* If in soft mode, just set the SOFT_DISABLE_BIT, else clear it */ + if (soft_mode) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); @@ -803,16 +842,15 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, * When soft_disable is set and enable is set, we want to * register the tracepoint for the event, but leave the event * as is. That means, if the event was already enabled, we do - * nothing (but set SOFT_MODE). If the event is disabled, we - * set SOFT_DISABLED before enabling the event tracepoint, so - * it still seems to be disabled. + * nothing. If the event is disabled, we set SOFT_DISABLED + * before enabling the event tracepoint, so it still seems + * to be disabled. */ if (!soft_disable) clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; - set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); /* Enable use of trace_buffered_event */ trace_buffered_event_enable(); } @@ -820,17 +858,17 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, if (!(file->flags & EVENT_FILE_FL_ENABLED)) { bool cmd = false, tgid = false; - /* Keep the event disabled, when going to SOFT_MODE. */ + /* Keep the event disabled, when going to soft mode. */ if (soft_disable) set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); - if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { + if (tr->trace_flags & TRACE_ITER(RECORD_CMD)) { cmd = true; tracing_start_cmdline_record(); set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } - if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { + if (tr->trace_flags & TRACE_ITER(RECORD_TGID)) { tgid = true; tracing_start_tgid_record(); set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); @@ -938,7 +976,7 @@ static int cache_mod(struct trace_array *tr, const char *mod, int set, if (!set) return remove_cache_mod(tr, mod, match, system, event); - event_mod = kzalloc(sizeof(*event_mod), GFP_KERNEL); + event_mod = kzalloc_obj(*event_mod); if (!event_mod) return -ENOMEM; @@ -1001,6 +1039,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task) struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_raw(tr->filtered_pids); trace_filter_add_remove_task(pid_list, NULL, task); @@ -1016,6 +1055,7 @@ event_filter_pid_sched_process_fork(void *data, struct trace_pid_list *pid_list; struct trace_array *tr = data; + guard(preempt)(); pid_list = rcu_dereference_sched(tr->filtered_pids); trace_filter_add_remove_task(pid_list, self, task); @@ -1273,6 +1313,9 @@ static void remove_event_file_dir(struct trace_event_file *file) free_event_filter(file->filter); file->flags |= EVENT_FILE_FL_FREED; event_file_put(file); + + /* Wake up hist poll waiters to notice the EVENT_FILE_FL_FREED flag. */ + hist_poll_wakeup(); } /* @@ -1358,6 +1401,9 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match, { int ret; + if (trace_array_is_readonly(tr)) + return -EACCES; + mutex_lock(&event_mutex); ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod); mutex_unlock(&event_mutex); @@ -1373,7 +1419,7 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set) if (!tr) return -ENOENT; - /* Modules events can be appened with :mod:<module> */ + /* Modules events can be appended with :mod:<module> */ mod = strstr(buf, ":mod:"); if (mod) { *mod = '\0'; @@ -1607,12 +1653,11 @@ static void *s_start(struct seq_file *m, loff_t *pos) struct set_event_iter *iter; loff_t l; - iter = kzalloc(sizeof(*iter), GFP_KERNEL); + iter = kzalloc_obj(*iter); + mutex_lock(&event_mutex); if (!iter) return NULL; - mutex_lock(&event_mutex); - iter->type = SET_EVENT_FILE; iter->file = list_entry(&tr->events, struct trace_event_file, list); @@ -1641,6 +1686,82 @@ static void t_stop(struct seq_file *m, void *p) mutex_unlock(&event_mutex); } +static int get_call_len(struct trace_event_call *call) +{ + int len; + + /* Get the length of "<system>:<event>" */ + len = strlen(call->class->system) + 1; + len += strlen(trace_event_name(call)); + + /* Set the index to 32 bytes to separate event from data */ + return len >= 32 ? 1 : 32 - len; +} + +/** + * t_show_filters - seq_file callback to display active event filters + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Identifies and prints active filters for the current event file in the + * iteration. If a filter is applied to the current event and, if so, + * prints the system name, event name, and the filter string. + */ +static int t_show_filters(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_filter *filter; + int len; + + guard(rcu)(); + filter = rcu_dereference(file->filter); + if (!filter || !filter->filter_string) + return 0; + + len = get_call_len(call); + + seq_printf(m, "%s:%s%*s%s\n", call->class->system, + trace_event_name(call), len, "", filter->filter_string); + + return 0; +} + +/** + * t_show_triggers - seq_file callback to display active event triggers + * @m: The seq_file interface for formatted output + * @v: The current trace_event_file being iterated + * + * Iterates through the trigger list of the current event file and prints + * each active trigger's configuration using its associated print + * operation. + */ +static int t_show_triggers(struct seq_file *m, void *v) +{ + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; + struct event_trigger_data *data; + int len; + + /* + * The event_mutex is held by t_start(), protecting the + * file->triggers list traversal. + */ + if (list_empty(&file->triggers)) + return 0; + + len = get_call_len(call); + + list_for_each_entry_rcu(data, &file->triggers, list) { + seq_printf(m, "%s:%s%*s", call->class->system, + trace_event_name(call), len, ""); + + data->cmd_ops->print(m, data); + } + + return 0; +} + #ifdef CONFIG_MODULES static int s_show(struct seq_file *m, void *v) { @@ -1772,8 +1893,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); - if (flags & EVENT_FILE_FL_SOFT_DISABLED || - flags & EVENT_FILE_FL_SOFT_MODE) + if (atomic_read(&file->sm_ref) != 0) strcat(buf, "*"); strcat(buf, "\n"); @@ -1818,28 +1938,28 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } -static ssize_t -system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +/* + * Returns: + * 0 : no events exist? + * 1 : all events are disabled + * 2 : all events are enabled + * 3 : some events are enabled and some are enabled + */ +int trace_events_enabled(struct trace_array *tr, const char *system) { - const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct trace_subsystem_dir *dir = filp->private_data; - struct event_subsystem *system = dir->subsystem; struct trace_event_call *call; struct trace_event_file *file; - struct trace_array *tr = dir->tr; - char buf[2]; int set = 0; - int ret; - mutex_lock(&event_mutex); + guard(mutex)(&event_mutex); + list_for_each_entry(file, &tr->events, list) { call = file->event_call; if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || !trace_event_name(call) || !call->class || !call->class->reg) continue; - if (system && strcmp(call->class->system, system->name) != 0) + if (system && strcmp(call->class->system, system) != 0) continue; /* @@ -1855,7 +1975,23 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (set == 3) break; } - mutex_unlock(&event_mutex); + + return set; +} + +static ssize_t +system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + const char set_to_char[4] = { '?', '0', '1', 'X' }; + struct trace_subsystem_dir *dir = filp->private_data; + struct event_subsystem *system = dir->subsystem; + struct trace_array *tr = dir->tr; + char buf[2]; + int set; + int ret; + + set = trace_events_enabled(tr, system ? system->name : NULL); buf[0] = set_to_char[set]; buf[1] = '\n'; @@ -2051,12 +2187,12 @@ static int trace_format_open(struct inode *inode, struct file *file) static ssize_t event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int id = (long)event_file_data(filp); + /* id is directly in i_private and available for inode's lifetime. */ + int id = (long)file_inode(filp)->i_private; char buf[32]; int len; - if (unlikely(!id)) - return -ENODEV; + WARN_ON(!id); len = sprintf(buf, "%d\n", id); @@ -2075,7 +2211,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc_obj(*s); if (!s) return -ENOMEM; @@ -2114,12 +2250,8 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); file = event_file_file(filp); - if (file) { - if (file->flags & EVENT_FILE_FL_FREED) - err = -ENODEV; - else - err = apply_event_filter(file, buf); - } + if (file) + err = apply_event_filter(file, buf); mutex_unlock(&event_mutex); kfree(buf); @@ -2140,7 +2272,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) struct event_subsystem *system = NULL; int ret; - if (tracing_is_disabled()) + if (unlikely(tracing_disabled)) return -ENODEV; /* Make sure the system still exists */ @@ -2189,7 +2321,7 @@ static int system_tr_open(struct inode *inode, struct file *filp) int ret; /* Make a temporary dir that has no system but points to tr */ - dir = kzalloc(sizeof(*dir), GFP_KERNEL); + dir = kzalloc_obj(*dir); if (!dir) return -ENOMEM; @@ -2235,7 +2367,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, if (*ppos) return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc_obj(*s); if (!s) return -ENOMEM; @@ -2285,7 +2417,7 @@ show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t * if (*ppos) return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc_obj(*s); if (!s) return -ENOMEM; @@ -2309,7 +2441,7 @@ show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t if (*ppos) return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc_obj(*s); if (!s) return -ENOMEM; @@ -2453,6 +2585,8 @@ ftrace_event_npid_write(struct file *filp, const char __user *ubuf, static int ftrace_event_avail_open(struct inode *inode, struct file *file); static int ftrace_event_set_open(struct inode *inode, struct file *file); +static int ftrace_event_show_filters_open(struct inode *inode, struct file *file); +static int ftrace_event_show_triggers_open(struct inode *inode, struct file *file); static int ftrace_event_set_pid_open(struct inode *inode, struct file *file); static int ftrace_event_set_npid_open(struct inode *inode, struct file *file); static int ftrace_event_release(struct inode *inode, struct file *file); @@ -2471,6 +2605,20 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = s_stop, }; +static const struct seq_operations show_show_event_filters_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_filters, + .stop = t_stop, +}; + +static const struct seq_operations show_show_event_triggers_seq_ops = { + .start = t_start, + .next = t_next, + .show = t_show_triggers, + .stop = t_stop, +}; + static const struct seq_operations show_set_pid_seq_ops = { .start = p_start, .next = p_next, @@ -2500,6 +2648,20 @@ static const struct file_operations ftrace_set_event_fops = { .release = ftrace_event_release, }; +static const struct file_operations ftrace_show_event_filters_fops = { + .open = ftrace_event_show_filters_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations ftrace_show_event_triggers_fops = { + .open = ftrace_event_show_triggers_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_pid_fops = { .open = ftrace_event_set_pid_open, .read = seq_read, @@ -2644,6 +2806,34 @@ ftrace_event_set_open(struct inode *inode, struct file *file) return ret; } +/** + * ftrace_event_show_filters_open - open interface for set_event_filters + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the set_event_filters file to the sequence operations + * required to iterate over and display active event filters. + */ +static int +ftrace_event_show_filters_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_filters_seq_ops); +} + +/** + * ftrace_event_show_triggers_open - open interface for show_event_triggers + * @inode: The inode of the file + * @file: The file being opened + * + * Connects the show_event_triggers file to the sequence operations + * required to iterate over and display active event triggers. + */ +static int +ftrace_event_show_triggers_open(struct inode *inode, struct file *file) +{ + return ftrace_event_open(inode, file, &show_show_event_triggers_seq_ops); +} + static int ftrace_event_set_pid_open(struct inode *inode, struct file *file) { @@ -2692,7 +2882,7 @@ create_new_subsystem(const char *name) struct event_subsystem *system; /* need to create new entry */ - system = kmalloc(sizeof(*system), GFP_KERNEL); + system = kmalloc_obj(*system); if (!system) return NULL; @@ -2703,7 +2893,7 @@ create_new_subsystem(const char *name) if (!system->name) goto out_free; - system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); + system->filter = kzalloc_obj(struct event_filter); if (!system->filter) goto out_free; @@ -2771,7 +2961,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } } - dir = kmalloc(sizeof(*dir), GFP_KERNEL); + dir = kmalloc_obj(*dir); if (!dir) goto out_fail; @@ -2782,8 +2972,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } else __get_system(system); - /* ftrace only has directories no files */ - if (strcmp(name, "ftrace") == 0) + /* ftrace only has directories no files, readonly instance too. */ + if (strcmp(name, "ftrace") == 0 || trace_array_is_readonly(tr)) nr_entries = 0; else nr_entries = ARRAY_SIZE(system_entries); @@ -2948,28 +3138,30 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) int ret; static struct eventfs_entry event_entries[] = { { - .name = "enable", + .name = "format", .callback = event_callback, - .release = event_release, }, +#ifdef CONFIG_PERF_EVENTS { - .name = "filter", + .name = "id", .callback = event_callback, }, +#endif +#define NR_RO_EVENT_ENTRIES (1 + IS_ENABLED(CONFIG_PERF_EVENTS)) +/* Readonly files must be above this line and counted by NR_RO_EVENT_ENTRIES. */ { - .name = "trigger", + .name = "enable", .callback = event_callback, + .release = event_release, }, { - .name = "format", + .name = "filter", .callback = event_callback, }, -#ifdef CONFIG_PERF_EVENTS { - .name = "id", + .name = "trigger", .callback = event_callback, }, -#endif #ifdef CONFIG_HIST_TRIGGERS { .name = "hist", @@ -3002,7 +3194,10 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) if (!e_events) return -ENOMEM; - nr_entries = ARRAY_SIZE(event_entries); + if (trace_array_is_readonly(tr)) + nr_entries = NR_RO_EVENT_ENTRIES; + else + nr_entries = ARRAY_SIZE(event_entries); name = trace_event_name(call); ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file); @@ -3100,7 +3295,10 @@ __register_event(struct trace_event_call *call, struct module *mod) if (ret < 0) return ret; + down_write(&trace_event_sem); list_add(&call->list, &ftrace_events); + up_write(&trace_event_sem); + if (call->flags & TRACE_EVENT_FL_DYNAMIC) atomic_set(&call->refcnt, 0); else @@ -3211,7 +3409,7 @@ static void add_str_to_module(struct module *module, char *str) { struct module_string *modstr; - modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); + modstr = kmalloc_obj(*modstr); /* * If we failed to allocate memory here, then we'll just @@ -3228,43 +3426,120 @@ static void add_str_to_module(struct module *module, char *str) list_add(&modstr->next, &module_strings); } +#define ATTRIBUTE_STR "__attribute__(" +#define ATTRIBUTE_STR_LEN (sizeof(ATTRIBUTE_STR) - 1) + +/* Remove all __attribute__() from @type. Return allocated string or @type. */ +static char *sanitize_field_type(const char *type) +{ + char *attr, *tmp, *next, *ret = (char *)type; + int depth; + + next = (char *)type; + while ((attr = strstr(next, ATTRIBUTE_STR))) { + /* Retry if "__attribute__(" is a part of another word. */ + if (attr != next && !isspace(attr[-1])) { + next = attr + ATTRIBUTE_STR_LEN; + continue; + } + + if (ret == type) { + ret = kstrdup(type, GFP_KERNEL); + if (WARN_ON_ONCE(!ret)) + return NULL; + attr = ret + (attr - type); + } + + /* the ATTRIBUTE_STR already has the first '(' */ + depth = 1; + next = attr + ATTRIBUTE_STR_LEN; + do { + tmp = strpbrk(next, "()"); + /* There is unbalanced parentheses */ + if (WARN_ON_ONCE(!tmp)) { + kfree(ret); + return (char *)type; + } + + if (*tmp == '(') + depth++; + else + depth--; + next = tmp + 1; + } while (depth > 0); + next = skip_spaces(next); + strcpy(attr, next); + next = attr; + } + return ret; +} + +static char *find_replacable_eval(const char *type, const char *eval_string, + int len) +{ + char *ptr; + + if (!eval_string) + return NULL; + + ptr = strchr(type, '['); + if (!ptr) + return NULL; + ptr++; + + if (!isalpha(*ptr) && *ptr != '_') + return NULL; + + if (strncmp(eval_string, ptr, len) != 0) + return NULL; + + return ptr; +} + static void update_event_fields(struct trace_event_call *call, struct trace_eval_map *map) { struct ftrace_event_field *field; + const char *eval_string = NULL; struct list_head *head; + int len = 0; char *ptr; char *str; - int len = strlen(map->eval_string); /* Dynamic events should never have field maps */ - if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) + if (call->flags & TRACE_EVENT_FL_DYNAMIC) return; + if (map) { + eval_string = map->eval_string; + len = strlen(map->eval_string); + } + head = trace_get_fields(call); list_for_each_entry(field, head, link) { - ptr = strchr(field->type, '['); - if (!ptr) - continue; - ptr++; - - if (!isalpha(*ptr) && *ptr != '_') - continue; + str = sanitize_field_type(field->type); + if (!str) + return; - if (strncmp(map->eval_string, ptr, len) != 0) - continue; + ptr = find_replacable_eval(str, eval_string, len); + if (ptr) { + if (str == field->type) { + str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); + } - str = kstrdup(field->type, GFP_KERNEL); - if (WARN_ON_ONCE(!str)) - return; - ptr = str + (ptr - field->type); - ptr = eval_replace(ptr, map, len); - /* enum/sizeof string smaller than value */ - if (WARN_ON_ONCE(!ptr)) { - kfree(str); - continue; + ptr = eval_replace(ptr, map, len); + /* enum/sizeof string smaller than value */ + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } } + if (str == field->type) + continue; /* * If the event is part of a module, then we need to free the string * when the module is removed. Otherwise, it will stay allocated @@ -3274,14 +3549,18 @@ static void update_event_fields(struct trace_event_call *call, add_str_to_module(call->module, str); field->type = str; + if (field->filter_type == FILTER_OTHER) + field->filter_type = filter_assign_type(field->type); } } -void trace_event_eval_update(struct trace_eval_map **map, int len) +/* Update all events for replacing eval and sanitizing */ +void trace_event_update_all(struct trace_eval_map **map, int len) { struct trace_event_call *call, *p; const char *last_system = NULL; bool first = false; + bool updated; int last_i; int i; @@ -3294,6 +3573,7 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) last_system = call->class->system; } + updated = false; /* * Since calls are grouped by systems, the likelihood that the * next call in the iteration belongs to the same system as the @@ -3313,8 +3593,12 @@ void trace_event_eval_update(struct trace_eval_map **map, int len) } update_event_printk(call, map[i]); update_event_fields(call, map[i]); + updated = true; } } + /* If not updated yet, update field for sanitizing. */ + if (!updated) + update_event_fields(call, NULL); cond_resched(); } up_write(&trace_event_sem); @@ -3399,20 +3683,27 @@ static struct boot_triggers { } bootup_triggers[MAX_BOOT_TRIGGERS]; static char bootup_trigger_buf[COMMAND_LINE_SIZE]; +static int boot_trigger_buf_len; static int nr_boot_triggers; static __init int setup_trace_triggers(char *str) { char *trigger; char *buf; + int len = boot_trigger_buf_len; int i; - strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); + if (len >= COMMAND_LINE_SIZE) + return 1; + + strscpy(bootup_trigger_buf + len, str, COMMAND_LINE_SIZE - len); trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event triggers"); - buf = bootup_trigger_buf; - for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { + buf = bootup_trigger_buf + len; + boot_trigger_buf_len += strlen(buf) + 1; + + for (i = nr_boot_triggers; i < MAX_BOOT_TRIGGERS; i++) { trigger = strsep(&buf, ","); if (!trigger) break; @@ -3548,7 +3839,7 @@ static int probe_remove_event_call(struct trace_event_call *call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) - * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress + * we are going to do, soft mode can suppress * TRACE_REG_UNREGISTER. */ if (file->flags & EVENT_FILE_FL_ENABLED) @@ -3659,7 +3950,7 @@ static void trace_module_remove_events(struct module *mod) if (call->module == mod) __trace_remove_event_call(call); } - /* Check for any strings allocade for this module */ + /* Check for any strings allocated for this module */ list_for_each_entry_safe(modstr, m, &module_strings, next) { if (modstr->module != mod) continue; @@ -3714,6 +4005,8 @@ __trace_add_event_dirs(struct trace_array *tr) struct trace_event_call *call; int ret; + lockdep_assert_held(&trace_event_sem); + list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) @@ -3836,11 +4129,6 @@ void trace_put_event_file(struct trace_event_file *file) EXPORT_SYMBOL_GPL(trace_put_event_file); #ifdef CONFIG_DYNAMIC_FTRACE - -/* Avoid typos */ -#define ENABLE_EVENT_STR "enable_event" -#define DISABLE_EVENT_STR "disable_event" - struct event_probe_data { struct trace_event_file *file; unsigned long count; @@ -3961,7 +4249,7 @@ static int free_probe_data(void *data) edata->ref--; if (!edata->ref) { - /* Remove the SOFT_MODE flag */ + /* Remove soft mode */ __ftrace_event_enable_disable(edata->file, 0, 1); trace_event_put_ref(edata->file->event_call); kfree(edata); @@ -4090,7 +4378,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash, goto out_put; ret = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc_obj(*data); if (!data) goto out_put; @@ -4216,7 +4504,11 @@ static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; static __init int setup_trace_event(char *str) { - strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE); + if (bootup_event_buf[0] != '\0') + strlcat(bootup_event_buf, ",", COMMAND_LINE_SIZE); + + strlcat(bootup_event_buf, str, COMMAND_LINE_SIZE); + trace_set_ring_buffer_expanded(NULL); disable_tracing_selftest("running event tracing"); @@ -4255,25 +4547,44 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) int nr_entries; static struct eventfs_entry events_entries[] = { { - .name = "enable", + .name = "header_page", .callback = events_callback, }, { - .name = "header_page", + .name = "header_event", .callback = events_callback, }, +#define NR_RO_TOP_ENTRIES 2 +/* Readonly files must be above this line and counted by NR_RO_TOP_ENTRIES. */ { - .name = "header_event", + .name = "enable", .callback = events_callback, }, }; - entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, - tr, &ftrace_set_event_fops); - if (!entry) - return -ENOMEM; + if (!trace_array_is_readonly(tr)) { + entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_fops); + if (!entry) + return -ENOMEM; - nr_entries = ARRAY_SIZE(events_entries); + /* There are not as crucial, just warn if they are not created */ + trace_create_file("show_event_filters", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_filters_fops); + + trace_create_file("show_event_triggers", TRACE_MODE_READ, parent, tr, + &ftrace_show_event_triggers_fops); + + trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, + tr, &ftrace_set_event_pid_fops); + + trace_create_file("set_event_notrace_pid", + TRACE_MODE_WRITE, parent, tr, + &ftrace_set_event_notrace_pid_fops); + nr_entries = ARRAY_SIZE(events_entries); + } else { + nr_entries = NR_RO_TOP_ENTRIES; + } e_events = eventfs_create_events_dir("events", parent, events_entries, nr_entries, tr); @@ -4282,15 +4593,6 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) return -ENOMEM; } - /* There are not as crucial, just warn if they are not created */ - - trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent, - tr, &ftrace_set_event_pid_fops); - - trace_create_file("set_event_notrace_pid", - TRACE_MODE_WRITE, parent, tr, - &ftrace_set_event_notrace_pid_fops); - tr->event_dir = e_events; return 0; @@ -4387,26 +4689,22 @@ static __init int event_trace_memsetup(void) return 0; } -__init void -early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +/* + * Helper function to enable or disable a comma-separated list of events + * from the bootup buffer. + */ +static __init void __early_set_events(struct trace_array *tr, char *buf, bool enable) { char *token; - int ret; - - while (true) { - token = strsep(&buf, ","); - - if (!token) - break; + while ((token = strsep(&buf, ","))) { if (*token) { - /* Restarting syscalls requires that we stop them first */ - if (disable_first) + if (enable) { + if (ftrace_set_clr_event(tr, token, 1)) + pr_warn("Failed to enable trace event: %s\n", token); + } else { ftrace_set_clr_event(tr, token, 0); - - ret = ftrace_set_clr_event(tr, token, 1); - if (ret) - pr_warn("Failed to enable trace event: %s\n", token); + } } /* Put back the comma to allow this to be called again */ @@ -4415,6 +4713,32 @@ early_enable_events(struct trace_array *tr, char *buf, bool disable_first) } } +/** + * early_enable_events - enable events from the bootup buffer + * @tr: The trace array to enable the events in + * @buf: The buffer containing the comma separated list of events + * @disable_first: If true, disable all events in @buf before enabling them + * + * This function enables events from the bootup buffer. If @disable_first + * is true, it will first disable all events in the buffer before enabling + * them. + * + * For syscall events, which rely on a global refcount to register the + * SYSCALL_WORK_SYSCALL_TRACEPOINT flag (especially for pid 1), we must + * ensure the refcount hits zero before re-enabling them. A simple + * "disable then enable" per-event is not enough if multiple syscalls are + * used, as the refcount will stay above zero. Thus, we need a two-phase + * approach: disable all, then enable all. + */ +__init void +early_enable_events(struct trace_array *tr, char *buf, bool disable_first) +{ + if (disable_first) + __early_set_events(tr, buf, false); + + __early_set_events(tr, buf, true); +} + static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); |
