diff options
author | Waiman Long <longman@redhat.com> | 2017-01-30 12:57:43 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2017-02-04 08:54:46 +0100 |
commit | 668802c25729a8e3423015c33c05f1c3be3858e9 (patch) | |
tree | 86bcd7436b39f4a4273bdace08a0b86d14e8196c /include/linux/cpumask.h | |
parent | 9556ad6ad0c60a23a7db36af65c9ffff51bbf644 (diff) | |
download | lwn-668802c25729a8e3423015c33c05f1c3be3858e9.tar.gz lwn-668802c25729a8e3423015c33c05f1c3be3858e9.zip |
tick/broadcast: Reduce lock cacheline contention
It was observed that on an Intel x86 system without the ARAT (Always
running APIC timer) feature and with fairly large number of CPUs as
well as CPUs coming in and out of intel_idle frequently, the lock
contention on the tick_broadcast_lock can become significant.
To reduce contention, the lock is put into its own cacheline and all
the cpumask_var_t variables are put into the __read_mostly section.
Running the SP benchmark of the NAS Parallel Benchmarks on a 4-socket
16-core 32-thread Nehalam system, the performance number improved
from 3353.94 Mop/s to 3469.31 Mop/s when this patch was applied on
a 4.9.6 kernel. This is a 3.4% improvement.
Signed-off-by: Waiman Long <longman@redhat.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1485799063-20857-1-git-send-email-longman@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include/linux/cpumask.h')
-rw-r--r-- | include/linux/cpumask.h | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c717f5ea88cb..23c1a6d09ec5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -649,11 +649,15 @@ static inline size_t cpumask_size(void) * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; -#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); @@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) +#define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { |