From bf378d341e4873ed928dc3c636252e6895a21f50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Oct 2013 13:55:29 +0100 Subject: perf: Fix perf ring buffer memory ordering The PPC64 people noticed a missing memory barrier and crufty old comments in the perf ring buffer code. So update all the comments and add the missing barrier. When the architecture implements local_t using atomic_long_t there will be double barriers issued; but short of introducing more conditional barrier primitives this is the best we can do. Reported-by: Victor Kaplansky Tested-by: Victor Kaplansky Signed-off-by: Peter Zijlstra Cc: Mathieu Desnoyers Cc: michael@ellerman.id.au Cc: Paul McKenney Cc: Michael Neuling Cc: Frederic Weisbecker Cc: anton@samba.org Cc: benh@kernel.crashing.org Link: http://lkml.kernel.org/r/20131025173749.GG19466@laptop.lan Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655a5d35..2fc1602e23bb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -456,13 +456,15 @@ struct perf_event_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_event_wakeup(). + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ -- cgit v1.2.3 From bd09d9a35111b6ffc0c7585d3853d0ec7f9f1eb4 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 30 Oct 2013 13:56:20 -0700 Subject: percpu: fix this_cpu_sub() subtrahend casting for unsigneds this_cpu_sub() is implemented as negation and addition. This patch casts the adjustment to the counter type before negation to sign extend the adjustment. This helps in cases where the counter type is wider than an unsigned adjustment. An alternative to this patch is to declare such operations unsupported, but it seemed useful to avoid surprises. This patch specifically helps the following example: unsigned int delta = 1 preempt_disable() this_cpu_write(long_counter, 0) this_cpu_sub(long_counter, delta) preempt_enable() Before this change long_counter on a 64 bit machine ends with value 0xffffffff, rather than 0xffffffffffffffff. This is because this_cpu_sub(pcp, delta) boils down to this_cpu_add(pcp, -delta), which is basically: long_counter = 0 + 0xffffffff Also apply the same cast to: __this_cpu_sub() __this_cpu_sub_return() this_cpu_sub_return() All percpu_test.ko passes, especially the following cases which previously failed: l -= ui_one; __this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); l -= ui_one; this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); CHECK(l, long_counter, 0xffffffffffffffff); ul -= ui_one; __this_cpu_sub(ulong_counter, ui_one); CHECK(ul, ulong_counter, -1); CHECK(ul, ulong_counter, 0xffffffffffffffff); ul = this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 2); ul = __this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 1); Signed-off-by: Greg Thelen Acked-by: Tejun Heo Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/percpu.h | 3 ++- include/linux/percpu.h | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200ee79d..b3e18f800302 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -128,7 +128,8 @@ do { \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? (val) : 0; \ + ((val) == 1 || (val) == -1)) ? \ + (int)(val) : 0; \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cc88172c7d9a..c74088ab103b 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -332,7 +332,7 @@ do { \ #endif #ifndef this_cpu_sub -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(val)) +# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef this_cpu_inc @@ -418,7 +418,7 @@ do { \ # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) #endif -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) @@ -586,7 +586,7 @@ do { \ #endif #ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(val)) +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef __this_cpu_inc @@ -668,7 +668,7 @@ do { \ __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(val)) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) -- cgit v1.2.3 From 9bf76ca325d5e9208eb343f7bd4cc666f703ed30 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 3 Nov 2013 12:36:28 +0100 Subject: ipc, msg: forbid negative values for "msg{max,mnb,mni}" Negative message lengths make no sense -- so don't do negative queue lenghts or identifier counts. Prevent them from getting negative. Also change the underlying data types to be unsigned to avoid hairy surprises with sign extensions in cases where those variables get evaluated in unsigned expressions with bigger data types, e.g size_t. In case a user still wants to have "unlimited" sizes she could just use INT_MAX instead. Signed-off-by: Mathias Krause Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 6 +++--- ipc/ipc_sysctl.c | 20 ++++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 19c19a5eee29..f6c82de12541 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -34,9 +34,9 @@ struct ipc_namespace { int sem_ctls[4]; int used_sems; - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; + unsigned int msg_ctlmax; + unsigned int msg_ctlmnb; + unsigned int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; int auto_msgmni; diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfece27ac..b0e99deb6d05 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { @@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", -- cgit v1.2.3