diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/tlb.c | 46 |
1 files changed, 29 insertions, 17 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 07b6701a540a..8db87cd92e6b 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -24,7 +24,7 @@ # define __flush_tlb_local native_flush_tlb_local # define __flush_tlb_global native_flush_tlb_global # define __flush_tlb_one_user(addr) native_flush_tlb_one_user(addr) -# define __flush_tlb_others(msk, info) native_flush_tlb_others(msk, info) +# define __flush_tlb_multi(msk, info) native_flush_tlb_multi(msk, info) #endif /* @@ -490,7 +490,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * Even in lazy TLB mode, the CPU should stay set in the * mm_cpumask. The TLB shootdown code can figure out from - * from cpu_tlbstate.is_lazy whether or not to send an IPI. + * cpu_tlbstate.is_lazy whether or not to send an IPI. */ if (WARN_ON_ONCE(real_prev != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))) @@ -697,7 +697,7 @@ static void flush_tlb_func(void *info) * garbage into our TLB. Since switching to init_mm is barely * slower than a minimal flush, just switch to init_mm. * - * This should be rare, with native_flush_tlb_others skipping + * This should be rare, with native_flush_tlb_multi() skipping * IPIs to lazy TLB mode CPUs. */ switch_mm_irqs_off(NULL, &init_mm, NULL); @@ -795,9 +795,14 @@ static bool tlb_is_not_lazy(int cpu) static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); -STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask, +STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info) { + /* + * Do accounting and tracing. Note that there are (and have always been) + * cases in which a remote TLB flush will be traced, but eventually + * would not happen. + */ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (info->end == TLB_FLUSH_ALL) trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL); @@ -816,8 +821,7 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask, * doing a speculative memory access. */ if (info->freed_tables) { - smp_call_function_many(cpumask, flush_tlb_func, - (void *)info, 1); + on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); } else { /* * Although we could have used on_each_cpu_cond_mask(), @@ -844,14 +848,14 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask, if (tlb_is_not_lazy(cpu)) __cpumask_set_cpu(cpu, cond_cpumask); } - smp_call_function_many(cond_cpumask, flush_tlb_func, (void *)info, 1); + on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true); } } -void flush_tlb_others(const struct cpumask *cpumask, +void flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info) { - __flush_tlb_others(cpumask, info); + __flush_tlb_multi(cpumask, info); } /* @@ -931,16 +935,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables, new_tlb_gen); - if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { + /* + * flush_tlb_multi() is not optimized for the common case in which only + * a local TLB flush is needed. Optimize this use-case by calling + * flush_tlb_func_local() directly in this case. + */ + if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) { + flush_tlb_multi(mm_cpumask(mm), info); + } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) { lockdep_assert_irqs_enabled(); local_irq_disable(); flush_tlb_func(info); local_irq_enable(); } - if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), info); - put_flush_tlb_info(); put_cpu(); } @@ -1152,16 +1160,20 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) int cpu = get_cpu(); info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0); - if (cpumask_test_cpu(cpu, &batch->cpumask)) { + /* + * flush_tlb_multi() is not optimized for the common case in which only + * a local TLB flush is needed. Optimize this use-case by calling + * flush_tlb_func_local() directly in this case. + */ + if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) { + flush_tlb_multi(&batch->cpumask, info); + } else if (cpumask_test_cpu(cpu, &batch->cpumask)) { lockdep_assert_irqs_enabled(); local_irq_disable(); flush_tlb_func(info); local_irq_enable(); } - if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) - flush_tlb_others(&batch->cpumask, info); - cpumask_clear(&batch->cpumask); put_flush_tlb_info(); |