From d71f290b4e98a39f49f2595a13be3b4d5ce8e1f1 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 13 May 2014 23:58:24 +0100 Subject: metag: Reduce maximum stack size to 256MB Specify the maximum stack size for arches where the stack grows upward (parisc and metag) in asm/processor.h rather than hard coding in fs/exec.c so that metag can specify a smaller value of 256MB rather than 1GB. This fixes a BUG on metag if the RLIMIT_STACK hard limit is increased beyond a safe value by root. E.g. when starting a process after running "ulimit -H -s unlimited" it will then attempt to use a stack size of the maximum 1GB which is far too big for metag's limited user virtual address space (stack_top is usually 0x3ffff000): BUG: failure at fs/exec.c:589/shift_arg_pages()! Signed-off-by: James Hogan Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: linux-parisc@vger.kernel.org Cc: linux-metag@vger.kernel.org Cc: John David Anglin Cc: stable@vger.kernel.org # only needed for >= v3.9 (arch/metag) --- fs/exec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 476f3ebf437e..238b7aa26f68 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -657,10 +657,10 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP - /* Limit stack size to 1GB */ + /* Limit stack size */ stack_base = rlimit_max(RLIMIT_STACK); - if (stack_base > (1 << 30)) - stack_base = 1 << 30; + if (stack_base > STACK_SIZE_MAX) + stack_base = STACK_SIZE_MAX; /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) -- cgit v1.2.3 From e041e328c4b41e1db79bfe5ba9992c2ed771ad19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 21 May 2014 17:32:19 +0200 Subject: perf: Fix perf_event_comm() vs. exec() assumption perf_event_comm() assumes that set_task_comm() is only called on exec(), and in particular that its only called on current. Neither are true, as Dave reported a WARN triggered by set_task_comm() being called on !current. Separate the exec() hook from the comm hook. Reported-by: Dave Jones Signed-off-by: Peter Zijlstra Cc: Alexander Viro Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20140521153219.GH5226@laptop.programming.kicks-ass.net [ Build fix. ] Signed-off-by: Ingo Molnar --- fs/exec.c | 1 + include/linux/perf_event.h | 4 +++- kernel/events/core.c | 28 ++++++++++++++++------------ 3 files changed, 20 insertions(+), 13 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 238b7aa26f68..a038a41a3677 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1110,6 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); + perf_event_exec(); set_task_comm(current, kbasename(bprm->filename)); /* Set the new mm task size. We have to do that late because it may diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3ef6ea12806a..9b5cd1992a88 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -695,6 +695,7 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); @@ -772,7 +773,7 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); -#else +#else /* !CONFIG_PERF_EVENTS: */ static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } @@ -802,6 +803,7 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index 440eefc67397..647698f91988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2970,6 +2970,22 @@ out: local_irq_restore(flags); } +void perf_event_exec(void) +{ + struct perf_event_context *ctx; + int ctxn; + + rcu_read_lock(); + for_each_task_context_nr(ctxn) { + ctx = current->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } + rcu_read_unlock(); +} + /* * Cross CPU call to read the hardware event */ @@ -5057,18 +5073,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; - struct perf_event_context *ctx; - int ctxn; - - rcu_read_lock(); - for_each_task_context_nr(ctxn) { - ctx = task->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - - perf_event_enable_on_exec(ctx); - } - rcu_read_unlock(); if (!atomic_read(&nr_comm_events)) return; -- cgit v1.2.3 From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 28 May 2014 11:45:04 +0300 Subject: perf: Differentiate exec() and non-exec() comm events perf tools like 'perf report' can aggregate samples by comm strings, which generally works. However, there are other potential use-cases. For example, to pair up 'calls' with 'returns' accurately (from branch events like Intel BTS) it is necessary to identify whether the process has exec'd. Although a comm event is generated when an 'exec' happens it is also generated whenever the comm string is changed on a whim (e.g. by prctl PR_SET_NAME). This patch adds a flag to the comm event to differentiate one case from the other. In order to determine whether the kernel supports the new flag, a selection bit named 'exec' is added to struct perf_event_attr. The bit does nothing but will cause perf_event_open() to fail if the bit is set on kernels that do not have it defined. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com Cc: Paul Mackerras Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Alexander Viro Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/exec.c | 6 +++--- include/linux/perf_event.h | 4 ++-- include/linux/sched.h | 6 +++++- include/uapi/linux/perf_event.h | 9 +++++++-- kernel/events/core.c | 4 ++-- 5 files changed, 19 insertions(+), 10 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index a038a41a3677..a3d33fe592d6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); perf_event_exec(); - set_task_comm(current, kbasename(bprm->filename)); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4c1d4685bf0..707617a8c0f6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern void perf_event_exec(void); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 221b2bde3723..ad86e1d7dbc2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, const char *from); +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) +{ + __set_task_comm(tsk, from, false); +} extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d9cd853818ad..5312fae47218 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -302,8 +302,8 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ - - __reserved_1 : 40; + comm_exec : 1, /* flag comm events that are due to an exec */ + __reserved_1 : 39; union { __u32 wakeup_events; /* wakeup every n events */ @@ -502,7 +502,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also diff --git a/kernel/events/core.c b/kernel/events/core.c index 8fac2056d51e..7da5e561e89a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) NULL); } -void perf_event_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task, bool exec) { struct perf_comm_event comm_event; @@ -5104,7 +5104,7 @@ void perf_event_comm(struct task_struct *task) .event_id = { .header = { .type = PERF_RECORD_COMM, - .misc = 0, + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, /* .size */ }, /* .pid */ -- cgit v1.2.3