From 87be28aaf1458445d5f648688c2eec0f13b8f3b9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:43:58 +0200 Subject: x86/asm/tsc: Replace rdtscll() with native_read_tsc() Now that the ->read_tsc() paravirt hook is gone, rdtscll() is just a wrapper around native_read_tsc(). Unwrap it. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/d2449ae62c1b1fb90195bcfb19ef4a35883a04dc.1434501121.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c index 5224ee5b392d..f02b0c0bff9b 100644 --- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c +++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c @@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void) printk(KERN_DEBUG "start--> \n"); then = read_pmtmr(); - rdtscll(then_tsc); + then_tsc = native_read_tsc(); for (i=0;i<20;i++) { mdelay(100); now = read_pmtmr(); - rdtscll(now_tsc); + now_tsc = native_read_tsc(); diff = (now - then) & 0xFFFFFF; diff_tsc = now_tsc - then_tsc; printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); -- cgit v1.2.3 From 4ea1636b04dbd66536fa387bae2eea463efc705b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 25 Jun 2015 18:44:07 +0200 Subject: x86/asm/tsc: Rename native_read_tsc() to rdtsc() Now that there is no paravirt TSC, the "native" is inappropriate. The function does RDTSC, so give it the obvious name: rdtsc(). Suggested-by: Borislav Petkov Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Huang Rui Cc: John Stultz Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: kvm ML Link: http://lkml.kernel.org/r/fd43e16281991f096c1e4d21574d9e1402c62d39.1434501121.git.luto@kernel.org [ Ported it to v4.2-rc1. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/aslr.c | 2 +- arch/x86/entry/vdso/vclock_gettime.c | 2 +- arch/x86/include/asm/msr.h | 11 ++++++++++- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/include/asm/tsc.h | 2 +- arch/x86/kernel/apb_timer.c | 8 ++++---- arch/x86/kernel/apic/apic.c | 8 ++++---- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++-- arch/x86/kernel/espfix_64.c | 2 +- arch/x86/kernel/hpet.c | 4 ++-- arch/x86/kernel/trace_clock.c | 2 +- arch/x86/kernel/tsc.c | 4 ++-- arch/x86/kvm/lapic.c | 4 ++-- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 4 ++-- arch/x86/kvm/x86.c | 12 ++++++------ arch/x86/lib/delay.c | 8 ++++---- drivers/cpufreq/intel_pstate.c | 2 +- drivers/input/gameport/gameport.c | 4 ++-- drivers/input/joystick/analog.c | 4 ++-- drivers/net/hamradio/baycom_epp.c | 2 +- drivers/thermal/intel_powerclamp.c | 4 ++-- tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c | 4 ++-- 25 files changed, 59 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index ea33236190b1..6a9b96b4624d 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -82,7 +82,7 @@ static unsigned long get_random_long(void) if (has_cpuflag(X86_FEATURE_TSC)) { debug_putstr(" RDTSC"); - raw = native_read_tsc(); + raw = rdtsc(); random ^= raw; use_i8254 = false; diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 972b488ac16a..0340d93c18ca 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -186,7 +186,7 @@ notrace static cycle_t vread_tsc(void) * but no one has ever seen it happen. */ rdtsc_barrier(); - ret = (cycle_t)native_read_tsc(); + ret = (cycle_t)rdtsc(); last = gtod->cycle_last; diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c89ed6ceed02..ff0c120dafe5 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -109,7 +109,16 @@ notrace static inline int native_write_msr_safe(unsigned int msr, extern int rdmsr_safe_regs(u32 regs[8]); extern int wrmsr_safe_regs(u32 regs[8]); -static __always_inline unsigned long long native_read_tsc(void) +/** + * rdtsc() - returns the current TSC without ordering constraints + * + * rdtsc() returns the result of RDTSC as a 64-bit integer. The + * only ordering constraint it supplies is the ordering implied by + * "asm volatile": it will put the RDTSC in the place you expect. The + * CPU can and will speculatively execute that RDTSC, though, so the + * results can be non-monotonic if compared on different CPUs. + */ +static __always_inline unsigned long long rdtsc(void) { DECLARE_ARGS(val, low, high); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 2bd69d62c623..5c490db62e32 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -62,7 +62,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) static __always_inline u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) { - u64 delta = native_read_tsc() - src->tsc_timestamp; + u64 delta = rdtsc() - src->tsc_timestamp; return pvclock_scale_delta(delta, src->tsc_to_system_mul, src->tsc_shift); } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index bc5fa2af112e..58505f01962f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -72,7 +72,7 @@ static __always_inline void boot_init_stack_canary(void) * on during the bootup the random pool has true entropy too. */ get_random_bytes(&canary, sizeof(canary)); - tsc = native_read_tsc(); + tsc = rdtsc(); canary += tsc + (tsc << 32UL); current->stack_canary = canary; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index b4883902948b..3df7675debcf 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -26,7 +26,7 @@ static inline cycles_t get_cycles(void) return 0; #endif - return native_read_tsc(); + return rdtsc(); } extern void tsc_init(void); diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 25efa534c4e4..222a57076039 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -263,7 +263,7 @@ static int apbt_clocksource_register(void) /* Verify whether apbt counter works */ t1 = dw_apb_clocksource_read(clocksource_apbt); - start = native_read_tsc(); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -273,7 +273,7 @@ static int apbt_clocksource_register(void) */ do { rep_nop(); - now = native_read_tsc(); + now = rdtsc(); } while ((now - start) < 200000UL); /* APBT is the only always on clocksource, it has to work! */ @@ -390,13 +390,13 @@ unsigned long apbt_quick_calibrate(void) old = dw_apb_clocksource_read(clocksource_apbt); old += loop; - t1 = native_read_tsc(); + t1 = rdtsc(); do { new = dw_apb_clocksource_read(clocksource_apbt); } while (new < old); - t2 = native_read_tsc(); + t2 = rdtsc(); shift = 5; if (unlikely(loop >> shift == 0)) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 51af1ed1ae2e..0d71cd9b4a50 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -457,7 +457,7 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; - tsc = native_read_tsc(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; } @@ -592,7 +592,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) unsigned long pm = acpi_pm_read_early(); if (cpu_has_tsc) - tsc = native_read_tsc(); + tsc = rdtsc(); switch (lapic_cal_loops++) { case 0: @@ -1209,7 +1209,7 @@ void setup_local_APIC(void) long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) - tsc = native_read_tsc(); + tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1293,7 +1293,7 @@ void setup_local_APIC(void) } if (queued) { if (cpu_has_tsc && cpu_khz) { - ntsc = native_read_tsc(); + ntsc = rdtsc(); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else max_loops--; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a69710db6112..51ad2af84a72 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -125,10 +125,10 @@ static void init_amd_k6(struct cpuinfo_x86 *c) n = K6_BUG_LOOP; f_vide = vide; - d = native_read_tsc(); + d = rdtsc(); while (n--) f_vide(); - d2 = native_read_tsc(); + d2 = rdtsc(); d = d2-d; if (d > 20*K6_BUG_LOOP) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a5283d2d0094..96cceccd11b4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -125,7 +125,7 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - m->tsc = native_read_tsc(); + m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -1784,7 +1784,7 @@ static void collect_tscs(void *data) { unsigned long *cpu_tsc = (unsigned long *)data; - cpu_tsc[smp_processor_id()] = native_read_tsc(); + cpu_tsc[smp_processor_id()] = rdtsc(); } static int mce_apei_read_done; diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 334a2a9c034d..67315cd0132c 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -110,7 +110,7 @@ static void init_espfix_random(void) */ if (!arch_get_random_long(&rand)) { /* The constant is an arbitrary large prime */ - rand = native_read_tsc(); + rand = rdtsc(); rand *= 0xc345c6b72fd16123UL; } diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index cc390fe69b71..f75c5908c7a6 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -735,7 +735,7 @@ static int hpet_clocksource_register(void) /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); - start = native_read_tsc(); + start = rdtsc(); /* * We don't know the TSC frequency yet, but waiting for @@ -745,7 +745,7 @@ static int hpet_clocksource_register(void) */ do { rep_nop(); - now = native_read_tsc(); + now = rdtsc(); } while ((now - start) < 200000UL); if (t1 == hpet_readl(HPET_COUNTER)) { diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c index bd8f4d41bd56..67efb8c96fc4 100644 --- a/arch/x86/kernel/trace_clock.c +++ b/arch/x86/kernel/trace_clock.c @@ -15,7 +15,7 @@ u64 notrace trace_clock_x86_tsc(void) u64 ret; rdtsc_barrier(); - ret = native_read_tsc(); + ret = rdtsc(); return ret; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e66f5dcaeb63..21d6e04e3e82 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -248,7 +248,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) data = cyc2ns_write_begin(cpu); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -290,7 +290,7 @@ u64 native_sched_clock(void) } /* read the Time Stamp Counter: */ - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); /* return the value in ns */ return cycles_2_ns(tsc_now); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 954e98a8c2e3..2f0ade48614f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1172,7 +1172,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ @@ -1240,7 +1240,7 @@ static void start_apic_timer(struct kvm_lapic *apic) local_irq_save(flags); now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc()); + guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 602b974a60a6..8dfbad7a2c44 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1080,7 +1080,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { u64 tsc; - tsc = svm_scale_tsc(vcpu, native_read_tsc()); + tsc = svm_scale_tsc(vcpu, rdtsc()); return target_tsc - tsc; } @@ -3079,7 +3079,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, native_read_tsc()); + svm_scale_tsc(vcpu, rdtsc()); break; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4fa1ccad7beb..10d69a6df14f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2236,7 +2236,7 @@ static u64 guest_read_tsc(void) { u64 host_tsc, tsc_offset; - host_tsc = native_read_tsc(); + host_tsc = rdtsc(); tsc_offset = vmcs_read64(TSC_OFFSET); return host_tsc + tsc_offset; } @@ -2317,7 +2317,7 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { - return target_tsc - native_read_tsc(); + return target_tsc - rdtsc(); } static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f771058cfb5c..dfa97139282d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1455,7 +1455,7 @@ static cycle_t read_tsc(void) * but no one has ever seen it happen. */ rdtsc_barrier(); - ret = (cycle_t)native_read_tsc(); + ret = (cycle_t)rdtsc(); last = pvclock_gtod_data.clock.cycle_last; @@ -1646,7 +1646,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 1; } if (!use_master_clock) { - host_tsc = native_read_tsc(); + host_tsc = rdtsc(); kernel_ns = get_kernel_ns(); } @@ -2810,7 +2810,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) { s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 : - native_read_tsc() - vcpu->arch.last_host_tsc; + rdtsc() - vcpu->arch.last_host_tsc; if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { @@ -2838,7 +2838,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - vcpu->arch.last_host_tsc = native_read_tsc(); + vcpu->arch.last_host_tsc = rdtsc(); } static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, @@ -6623,7 +6623,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) hw_breakpoint_restore(); vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - native_read_tsc()); + rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7437,7 +7437,7 @@ int kvm_arch_hardware_enable(void) if (ret != 0) return ret; - local_tsc = native_read_tsc(); + local_tsc = rdtsc(); stable = !check_tsc_unstable(); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 35115f3786a9..f24bc59ab0a0 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -55,10 +55,10 @@ static void delay_tsc(unsigned long __loops) preempt_disable(); cpu = smp_processor_id(); rdtsc_barrier(); - bclock = native_read_tsc(); + bclock = rdtsc(); for (;;) { rdtsc_barrier(); - now = native_read_tsc(); + now = rdtsc(); if ((now - bclock) >= loops) break; @@ -80,7 +80,7 @@ static void delay_tsc(unsigned long __loops) loops -= (now - bclock); cpu = smp_processor_id(); rdtsc_barrier(); - bclock = native_read_tsc(); + bclock = rdtsc(); } } preempt_enable(); @@ -100,7 +100,7 @@ void use_tsc_delay(void) int read_current_timer(unsigned long *timer_val) { if (delay_fn == delay_tsc) { - *timer_val = native_read_tsc(); + *timer_val = rdtsc(); return 0; } return -1; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 15ada47bb720..7c56d7eaa671 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -765,7 +765,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu) local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); - tsc = native_read_tsc(); + tsc = rdtsc(); local_irq_restore(flags); cpu->last_sample_time = cpu->sample.time; diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index abc0cb22e750..4a2a9e370be7 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -149,9 +149,9 @@ static int old_gameport_measure_speed(struct gameport *gameport) for(i = 0; i < 50; i++) { local_irq_save(flags); - t1 = native_read_tsc(); + t1 = rdtsc(); for (t = 0; t < 50; t++) gameport_read(gameport); - t2 = native_read_tsc(); + t2 = rdtsc(); local_irq_restore(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index f871b4f00056..6f8b084e13d0 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -143,7 +143,7 @@ struct analog_port { #include -#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)native_read_tsc(); else x = get_time_pit(); } while (0) +#define GET_TIME(x) do { if (cpu_has_tsc) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0) #define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") static unsigned int get_time_pit(void) @@ -160,7 +160,7 @@ static unsigned int get_time_pit(void) return count; } #elif defined(__x86_64__) -#define GET_TIME(x) do { x = (unsigned int)native_read_tsc(); } while (0) +#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "TSC" #elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 44e5c3b5e0af..72c9f1f352b4 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -638,7 +638,7 @@ static int receive(struct net_device *dev, int cnt) #define GETTICK(x) \ ({ \ if (cpu_has_tsc) \ - x = (unsigned int)native_read_tsc(); \ + x = (unsigned int)rdtsc(); \ }) #else /* __i386__ */ #define GETTICK(x) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index ab13448defcf..2ac0c704bcb8 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -340,7 +340,7 @@ static bool powerclamp_adjust_controls(unsigned int target_ratio, /* check result for the last window */ msr_now = pkg_state_counter(); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); /* calculate pkg cstate vs tsc ratio */ if (!msr_last || !tsc_last) @@ -482,7 +482,7 @@ static void poll_pkg_cstate(struct work_struct *dummy) u64 val64; msr_now = pkg_state_counter(); - tsc_now = native_read_tsc(); + tsc_now = rdtsc(); jiffies_now = jiffies; /* calculate pkg cstate vs tsc ratio */ diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c index f02b0c0bff9b..6ff8383f2941 100644 --- a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c +++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c @@ -81,11 +81,11 @@ static int __init cpufreq_test_tsc(void) printk(KERN_DEBUG "start--> \n"); then = read_pmtmr(); - then_tsc = native_read_tsc(); + then_tsc = rdtsc(); for (i=0;i<20;i++) { mdelay(100); now = read_pmtmr(); - now_tsc = native_read_tsc(); + now_tsc = rdtsc(); diff = (now - then) & 0xFFFFFF; diff_tsc = now_tsc - then_tsc; printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc); -- cgit v1.2.3 From 5e5c684a2c78b98dcba3d6fce56773a375f63980 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2015 12:44:18 -0700 Subject: x86/entry, selftests/x86: Add a test for 32-bit fast syscall arg faults This test passes on 4.0 and fails on some newer kernels. Fortunately, the failure is likely not a big deal. This test will make sure that we don't break it further (e.g. OOPSing) as we clean up the entry code and that we eventually fix the regression. There's arguably no need to preserve the old ABI here -- anything that makes it into a fast (vDSO) syscall with a bad stack is about to crash no matter what we do. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/9cfcc51005168cb1b06b31991931214d770fc59a.1435952415.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/syscall_arg_fault.c | 130 ++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/syscall_arg_fault.c (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index caa60d56d7d1..e8df47e6326c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs -TARGETS_C_32BIT_ONLY := entry_from_vm86 +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c new file mode 100644 index 000000000000..7db4fc9fa09f --- /dev/null +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -0,0 +1,130 @@ +/* + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Our sigaltstack scratch space. */ +static unsigned char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; +static sigjmp_buf jmpbuf; + +static volatile sig_atomic_t n_errs; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { + printf("[FAIL]\tAX had the wrong value: 0x%x\n", + ctx->uc_mcontext.gregs[REG_EAX]); + n_errs++; + } else { + printf("[OK]\tSeems okay\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigill(int sig, siginfo_t *info, void *ctx_void) +{ + printf("[SKIP]\tIllegal instruction\n"); + siglongjmp(jmpbuf, 1); +} + +int main() +{ + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGSEGV, sigsegv, SA_ONSTACK); + sethandler(SIGILL, sigill, SA_ONSTACK); + + /* + * Exercise another nasty special case. The 32-bit SYSCALL + * and SYSENTER instructions (even in compat mode) each + * clobber one register. A Linux system call has a syscall + * number and six arguments, and the user stack pointer + * needs to live in some register on return. That means + * that we need eight registers, but SYSCALL and SYSENTER + * only preserve seven registers. As a result, one argument + * ends up on the stack. The stack is user memory, which + * means that the kernel can fail to read it. + * + * The 32-bit fast system calls don't have a defined ABI: + * we're supposed to invoke them through the vDSO. So we'll + * fudge it: we set all regs to invalid pointer values and + * invoke the entry instruction. The return will fail no + * matter what, and we completely lose our program state, + * but we can fix it up with a signal handler. + */ + + printf("[RUN]\tSYSENTER with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + + printf("[RUN]\tSYSCALL with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "syscall\n\t" + "pushl $0" /* make sure we segfault cleanly */ + : : : "memory", "flags"); + } + + return 0; +} -- cgit v1.2.3 From f2a50f8b7da45ff2de93a71393e715a2ab9f3b68 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 9 Jul 2015 19:17:29 -0700 Subject: x86/selftests, x86/vm86: Improve entry_from_vm86 selftest The entry_from_vm86 selftest was very weak. Improve it: test more types of kernel entries from vm86 mode and test them more carefully. While we're at it, try to improve behavior on non-SEP CPUs. The old code was buggy because I misunderstood the intended semantics of #UD in vm86, so I didn't handle a possible signal. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d8ef1d7368ac70d8342481563ed50f9a7d2eea6f.1436492057.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 5c38a187677b..f004b2a09916 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -28,6 +28,55 @@ static unsigned long load_addr = 0x10000; static int nerrs = 0; +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static sig_atomic_t got_signal; + +static void sighandler(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM || + (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) { + printf("[FAIL]\tSignal frame should not reflect vm86 mode\n"); + nerrs++; + } + + const char *signame; + if (sig == SIGSEGV) + signame = "SIGSEGV"; + else if (sig == SIGILL) + signame = "SIGILL"; + else + signame = "unexpected signal"; + + printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame, + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL], + (unsigned short)ctx->uc_mcontext.gregs[REG_CS]); + + got_signal = 1; +} + asm ( ".pushsection .rodata\n\t" ".type vmcode_bound, @object\n\t" @@ -38,6 +87,14 @@ asm ( "int3\n\t" "vmcode_sysenter:\n\t" "sysenter\n\t" + "vmcode_syscall:\n\t" + "syscall\n\t" + "vmcode_sti:\n\t" + "sti\n\t" + "vmcode_int3:\n\t" + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" ".size vmcode, . - vmcode\n\t" "end_vmcode:\n\t" ".code32\n\t" @@ -45,9 +102,11 @@ asm ( ); extern unsigned char vmcode[], end_vmcode[]; -extern unsigned char vmcode_bound[], vmcode_sysenter[]; +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], + vmcode_sti[], vmcode_int3[], vmcode_int80[]; static void do_test(struct vm86plus_struct *v86, unsigned long eip, + unsigned int rettype, unsigned int retarg, const char *text) { long ret; @@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, else sprintf(trapname, "%d", trapno); - printf("[OK]\tExited vm86 mode due to #%s\n", trapname); + printf("[INFO]\tExited vm86 mode due to #%s\n", trapname); } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { - printf("[OK]\tExited vm86 mode due to unhandled GP fault\n"); + printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n"); + } else if (VM86_TYPE(ret) == VM86_TRAP) { + printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n", + VM86_ARG(ret)); + } else if (VM86_TYPE(ret) == VM86_SIGNAL) { + printf("[INFO]\tExited vm86 mode due to a signal\n"); + } else if (VM86_TYPE(ret) == VM86_STI) { + printf("[INFO]\tExited vm86 mode due to STI\n"); } else { - printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n", + printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n", VM86_TYPE(ret), VM86_ARG(ret)); } + + if (rettype == -1 || + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { + printf("[OK]\tReturned correctly\n"); + } else { + printf("[FAIL]\tIncorrect return reason\n"); + nerrs++; + } } int main(void) @@ -105,10 +179,52 @@ int main(void) assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ /* #BR -- should deliver SIG??? */ - do_test(&v86, vmcode_bound - vmcode, "#BR"); - - /* SYSENTER -- should cause #GP or #UD depending on CPU */ - do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER"); + do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR"); + + /* + * SYSENTER -- should cause #GP or #UD depending on CPU. + * Expected return type -1 means that we shouldn't validate + * the vm86 return value. This will avoid problems on non-SEP + * CPUs. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER"); + clearhandler(SIGILL); + + /* + * SYSCALL would be a disaster in VM86 mode. Fortunately, + * there is no kernel that both enables SYSCALL and sets + * EFER.SCE, so it's #UD on all systems. But vm86 is + * buggy (or has a "feature"), so the SIGILL will actually + * be delivered. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL"); + clearhandler(SIGILL); + + /* STI with VIP set */ + v86.regs.eflags |= X86_EFLAGS_VIP; + v86.regs.eflags &= ~X86_EFLAGS_IF; + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + + /* INT3 -- should cause #BP */ + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); + + /* INT80 -- should exit with "INTx 0x80" */ + v86.regs.eax = (unsigned int)-1; + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); + + /* Execute a null pointer */ + v86.regs.cs = 0; + v86.regs.ss = 0; + sethandler(SIGSEGV, sighandler, 0); + got_signal = 0; + do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer"); + if (!got_signal) { + printf("[FAIL]\tDid not receive SIGSEGV\n"); + nerrs++; + } + clearhandler(SIGSEGV); return (nerrs == 0 ? 0 : 1); } -- cgit v1.2.3 From 014dc90b66c8d0b5f5a9400440727c134ee5e5a3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Jul 2015 14:31:33 -0700 Subject: selftests/x86, x86/ldt: Add a selftest for modify_ldt() This tests general modify_ldt() behavior (only writes, so far) as well as synchronous updates via IPI. It fails on old kernels. I called this ldt_gdt because I'll add set_thread_area() tests to it at some point. Signed-off-by: Andy Lutomirski Reviewed-by: Kees Cook Cc: Andrew Cooper Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jan Beulich Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt Cc: Thomas Gleixner Cc: security@kernel.org Cc: xen-devel Link: http://lkml.kernel.org/r/dcfda65dad07ff5a3ea97a9172b5963bf8031b2e.1438291540.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 576 ++++++++++++++++++++++++++++++++++ 2 files changed, 577 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/ldt_gdt.c (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index e8df47e6326c..b70da4acb4e5 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs +TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs ldt_gdt TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c new file mode 100644 index 000000000000..31a3035cd4eb --- /dev/null +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -0,0 +1,576 @@ +/* + * ldt_gdt.c - Test cases for LDT and GDT access + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AR_ACCESSED (1<<8) + +#define AR_TYPE_RODATA (0 * (1<<9)) +#define AR_TYPE_RWDATA (1 * (1<<9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9)) +#define AR_TYPE_XOCODE (4 * (1<<9)) +#define AR_TYPE_XRCODE (5 * (1<<9)) +#define AR_TYPE_XOCODE_CONF (6 * (1<<9)) +#define AR_TYPE_XRCODE_CONF (7 * (1<<9)) + +#define AR_DPL3 (3 * (1<<13)) + +#define AR_S (1 << 12) +#define AR_P (1 << 15) +#define AR_AVL (1 << 20) +#define AR_L (1 << 21) +#define AR_DB (1 << 22) +#define AR_G (1 << 23) + +static int nerrs; + +static void check_invalid_segment(uint16_t index, int ldt) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (has_limit || has_ar) { + printf("[FAIL]\t%s entry %hu is valid but should be invalid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + } else { + printf("[OK]\t%s entry %hu is invalid\n", + (ldt ? "LDT" : "GDT"), index); + } +} + +static void check_valid_segment(uint16_t index, int ldt, + uint32_t expected_ar, uint32_t expected_limit, + bool verbose) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (!has_limit || !has_ar) { + printf("[FAIL]\t%s entry %hu is invalid but should be valid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + return; + } + + if (ar != expected_ar) { + printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, expected_ar); + nerrs++; + } else if (limit != expected_limit) { + printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, limit, expected_limit); + nerrs++; + } else if (verbose) { + printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, limit); + } +} + +static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, + bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + uint32_t limit = desc->limit; + if (desc->limit_in_pages) + limit = (limit << 12) + 4095; + check_valid_segment(desc->entry_number, 1, ar, limit, true); + return true; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + return false; + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + return false; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + return false; + } + } +} + +static bool install_valid(const struct user_desc *desc, uint32_t ar) +{ + return install_valid_mode(desc, ar, false); +} + +static void install_invalid(const struct user_desc *desc, bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + check_invalid_segment(desc->entry_number, 1); + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + } + } +} + +static int safe_modify_ldt(int func, struct user_desc *ptr, + unsigned long bytecount) +{ + int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount); + if (ret < -1) + errno = -ret; + return ret; +} + +static void fail_install(struct user_desc *desc) +{ + if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) { + printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n"); + nerrs++; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + printf("[OK]\tmodify_ldt failure %d\n", errno); + } +} + +static void do_simple_tests(void) +{ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 10, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.entry_number = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.base_addr = 0xf0000000; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + desc.useable = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G | AR_AVL); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.seg_32bit = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_G | AR_AVL); + + desc.seg_32bit = 1; + desc.contents = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.contents = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 0; + desc.limit_in_pages = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | + AR_S | AR_DB | AR_AVL); + + desc.contents = 3; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 0; + desc.contents = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + +#ifdef __x86_64__ + desc.lm = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + desc.lm = 0; +#endif + + bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + + if (entry1_okay) { + printf("[RUN]\tTest fork\n"); + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + check_valid_segment(desc.entry_number, 1, + AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, desc.limit, + true); + check_invalid_segment(1, 1); + exit(nerrs ? 1 : 0); + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tTest size\n"); + int i; + for (i = 0; i < 8192; i++) { + desc.entry_number = i; + desc.limit = i; + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + printf("[FAIL]\tFailed to install entry %d\n", i); + nerrs++; + break; + } + } + for (int j = 0; j < i; j++) { + check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, j, false); + } + printf("[DONE]\tSize test\n"); + } else { + printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n"); + } + + /* Test entry_number too high. */ + desc.entry_number = 8192; + fail_install(&desc); + + /* Test deletion and actions mistakeable for deletion. */ + memset(&desc, 0, sizeof(desc)); + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.seg_not_present = 0; + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P); + + desc.read_exec_only = 0; + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.read_exec_only = 1; + desc.limit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.limit = 0; + desc.base_addr = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.base_addr = 0; + install_invalid(&desc, false); + + desc.seg_not_present = 0; + desc.read_exec_only = 0; + desc.seg_32bit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + install_invalid(&desc, true); +} + +/* + * 0: thread is idle + * 1: thread armed + * 2: thread should clear LDT entry 0 + * 3: thread should exit + */ +static volatile unsigned int ftx; + +static void *threadproc(void *ctx) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 1"); /* should never fail */ + + while (1) { + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + while (ftx != 2) { + if (ftx >= 3) + return NULL; + } + + /* clear LDT entry 0 */ + const struct user_desc desc = {}; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0) + err(1, "modify_ldt"); + + /* If ftx == 2, set it to zero. If ftx == 100, quit. */ + unsigned int x = -2; + asm volatile ("lock xaddl %[x], %[ftx]" : + [x] "+r" (x), [ftx] "+m" (ftx)); + if (x != 2) + return NULL; + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static void do_multicpu_tests(void) +{ + cpu_set_t cpuset; + pthread_t thread; + int failures = 0, iters = 5, i; + unsigned short orig_ss; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 1\n"); + return; + } + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 0\n"); + return; + } + + sethandler(SIGSEGV, sigsegv, 0); +#ifdef __i386__ + /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ + sethandler(SIGILL, sigsegv, 0); +#endif + + printf("[RUN]\tCross-CPU LDT invalidation\n"); + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + asm volatile ("mov %%ss, %0" : "=rm" (orig_ss)); + + for (i = 0; i < 5; i++) { + if (sigsetjmp(jmpbuf, 1) != 0) + continue; + + /* Make sure the thread is ready after the last test. */ + while (ftx != 0) + ; + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + if (errno != ENOSYS) + err(1, "modify_ldt"); + printf("[SKIP]\tmodify_ldt unavailable\n"); + break; + } + + /* Arm the thread. */ + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + asm volatile ("mov %0, %%ss" : : "r" (0x7)); + + /* Go! */ + ftx = 2; + + while (ftx != 0) + ; + + /* + * On success, modify_ldt will segfault us synchronously, + * and we'll escape via siglongjmp. + */ + + failures++; + asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); + }; + + ftx = 100; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + if (failures) { + printf("[FAIL]\t%d of %d iterations failed\n", failures, iters); + nerrs++; + } else { + printf("[OK]\tAll %d iterations succeeded\n", iters); + } +} + +static int finish_exec_test(void) +{ + /* + * In a sensible world, this would be check_invalid_segment(0, 1); + * For better or for worse, though, the LDT is inherited across exec. + * We can probably change this safely, but for now we test it. + */ + check_valid_segment(0, 1, + AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, + 42, true); + + return nerrs ? 1 : 0; +} + +static void do_exec_test(void) +{ + printf("[RUN]\tTest exec\n"); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 42, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + pid_t child = fork(); + if (child == 0) { + execl("/proc/self/exe", "ldt_gdt_test_exec", NULL); + printf("[FAIL]\tCould not exec self\n"); + exit(1); /* exec failed */ + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } +} + +int main(int argc, char **argv) +{ + if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) + return finish_exec_test(); + + do_simple_tests(); + + do_multicpu_tests(); + + do_exec_test(); + + return nerrs ? 1 : 0; +} -- cgit v1.2.3 From e800eb39e3f586e46a2007f72d3b609f6e3b888d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 29 Jul 2015 14:35:43 -0700 Subject: selftests/x86/vm86: Fix entry_from_vm86 test on 64-bit kernels The test failed due to an oversight on my part when run on a 64-bit kernel. vm86 isn't expected to work at all, and I mistakenly failed one part of the test because no signal was delivered. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/502c8bef877b33fe4943885ded6125dfcc7892db.1438205722.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/entry_from_vm86.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index f004b2a09916..9a43a59a9bb4 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -105,7 +105,8 @@ extern unsigned char vmcode[], end_vmcode[]; extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], vmcode_sti[], vmcode_int3[], vmcode_int80[]; -static void do_test(struct vm86plus_struct *v86, unsigned long eip, +/* Returns false if the test was skipped. */ +static bool do_test(struct vm86plus_struct *v86, unsigned long eip, unsigned int rettype, unsigned int retarg, const char *text) { @@ -117,7 +118,7 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, if (ret == -1 && errno == ENOSYS) { printf("[SKIP]\tvm86 not supported\n"); - return; + return false; } if (VM86_TYPE(ret) == VM86_INTx) { @@ -154,6 +155,8 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, printf("[FAIL]\tIncorrect return reason\n"); nerrs++; } + + return true; } int main(void) @@ -219,8 +222,8 @@ int main(void) v86.regs.ss = 0; sethandler(SIGSEGV, sighandler, 0); got_signal = 0; - do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer"); - if (!got_signal) { + if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") && + !got_signal) { printf("[FAIL]\tDid not receive SIGSEGV\n"); nerrs++; } -- cgit v1.2.3 From 33f3df41d03879ab86c7f2d650e67b655e0b85c8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 17 Aug 2015 12:22:51 -0700 Subject: selftests/x86: Disable sigreturn_64 sigreturn_64 was broken by ed596cde9425 ("Revert x86 sigcontext cleanups"). Turn it off until we have a better fix. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a184e75ff170a0bcd76bf376c41cad2c402fe9f7.1439838962.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index b70da4acb4e5..986e7cbf118f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,8 +4,8 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs ldt_gdt -TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) -- cgit v1.2.3 From a9c909ce8c7853b4fc16055c50eb50d91e20cb93 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 17 Aug 2015 12:22:52 -0700 Subject: selftests/x86: Add syscall_nt selftest I've had this sitting around for a while. Add it to the selftests tree. Far Cry running under Wine depends on this behavior. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/ee4d63799a9e5294b70930618b71d04d2770eb2d.1439838962.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/syscall_nt.c | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/syscall_nt.c (limited to 'tools') diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 986e7cbf118f..29089b24d18b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,7 +4,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c new file mode 100644 index 000000000000..60c06af4646a --- /dev/null +++ b/tools/testing/selftests/x86/syscall_nt.c @@ -0,0 +1,54 @@ +/* + * syscall_nt.c - checks syscalls with NT set + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Some obscure user-space code requires the ability to make system calls + * with FLAGS.NT set. Make sure it works. + */ + +#include +#include +#include +#include + +#ifdef __x86_64__ +# define WIDTH "q" +#else +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +int main() +{ + printf("[RUN]\tSet NT and issue a syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_NT); + syscall(SYS_getpid); + if (get_eflags() & X86_EFLAGS_NT) { + printf("[OK]\tThe syscall worked and NT is still set\n"); + return 0; + } else { + printf("[FAIL]\tThe syscall worked but NT was cleared\n"); + return 1; + } +} -- cgit v1.2.3