From 0d0970eef3b03ef08b19da5bc3044410731cf38f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 20 Sep 2017 16:24:32 -0500 Subject: objtool: Handle another GCC stack pointer adjustment bug The kbuild bot reported the following warning with GCC 4.4 and a randconfig: net/socket.o: warning: objtool: compat_sock_ioctl()+0x1083: stack state mismatch: cfa1=7+160 cfa2=-1+0 This is caused by another GCC non-optimization, where it backs up and restores the stack pointer for no apparent reason: 2f91: 48 89 e0 mov %rsp,%rax 2f94: 4c 89 e7 mov %r12,%rdi 2f97: 4c 89 f6 mov %r14,%rsi 2f9a: ba 20 00 00 00 mov $0x20,%edx 2f9f: 48 89 c4 mov %rax,%rsp This issue would have been happily ignored before the following commit: dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug") But now that objtool is paying attention to such stack pointer writes to/from a register, it needs to understand them properly. In this case that means recognizing that the "mov %rsp, %rax" instruction is potentially a backup of the stack pointer. Reported-by: kbuild test robot Signed-off-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dmitriy Vyukov Cc: Linus Torvalds Cc: Matthias Kaehlcke Cc: Miguel Bernal Marin Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: dd88a0a0c861 ("objtool: Handle GCC stack pointer adjustment bug") Link: http://lkml.kernel.org/r/8c7aa8e9a36fbbb6655d9d8e7cea58958c912da8.1505942196.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 6 +++--- tools/objtool/check.c | 43 +++++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 0e8c8ec4fd4e..0f22768c0d4d 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; case 0x89: - if (rex == 0x48 && modrm == 0xe5) { + if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { - /* mov %rsp, %rbp */ + /* mov %rsp, reg */ *type = INSN_STACK; op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_BP; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; break; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f744617c9946..a0c518ecf085 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) switch (op->src.type) { case OP_SRC_REG: - if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) { + if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && + cfa->base == CFI_SP && + regs[CFI_BP].base == CFI_CFA && + regs[CFI_BP].offset == -cfa->offset) { + + /* mov %rsp, %rbp */ + cfa->base = op->dest.reg; + state->bp_scratch = false; + } - if (cfa->base == CFI_SP && - regs[CFI_BP].base == CFI_CFA && - regs[CFI_BP].offset == -cfa->offset) { + else if (op->src.reg == CFI_SP && + op->dest.reg == CFI_BP && state->drap) { - /* mov %rsp, %rbp */ - cfa->base = op->dest.reg; - state->bp_scratch = false; - } + /* drap: mov %rsp, %rbp */ + regs[CFI_BP].base = CFI_BP; + regs[CFI_BP].offset = -state->stack_size; + state->bp_scratch = false; + } - else if (state->drap) { + else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { - /* drap: mov %rsp, %rbp */ - regs[CFI_BP].base = CFI_BP; - regs[CFI_BP].offset = -state->stack_size; - state->bp_scratch = false; - } + /* + * mov %rsp, %reg + * + * This is needed for the rare case where GCC + * does: + * + * mov %rsp, %rax + * ... + * mov %rax, %rsp + */ + state->vals[op->dest.reg].base = CFI_CFA; + state->vals[op->dest.reg].offset = -state->stack_size; } else if (op->dest.reg == cfa->base) { -- cgit v1.2.3 From f5caf621ee357279e759c0911daf6d55c7d36f03 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 20 Sep 2017 16:24:33 -0500 Subject: x86/asm: Fix inline asm call constraints for Clang For inline asm statements which have a CALL instruction, we list the stack pointer as a constraint to convince GCC to ensure the frame pointer is set up first: static inline void foo() { register void *__sp asm(_ASM_SP); asm("call bar" : "+r" (__sp)) } Unfortunately, that pattern causes Clang to corrupt the stack pointer. The fix is easy: convert the stack pointer register variable to a global variable. It should be noted that the end result is different based on the GCC version. With GCC 6.4, this patch has exactly the same result as before: defconfig defconfig-nofp distro distro-nofp before 9820389 9491555 8816046 8516940 after 9820389 9491555 8816046 8516940 With GCC 7.2, however, GCC's behavior has changed. It now changes its behavior based on the conversion of the register variable to a global. That somehow convinces it to *always* set up the frame pointer before inserting *any* inline asm. (Therefore, listing the variable as an output constraint is a no-op and is no longer necessary.) It's a bit overkill, but the performance impact should be negligible. And in fact, there's a nice improvement with frame pointers disabled: defconfig defconfig-nofp distro distro-nofp before 9796316 9468236 9076191 8790305 after 9796957 9464267 9076381 8785949 So in summary, while listing the stack pointer as an output constraint is no longer necessary for newer versions of GCC, it's still needed for older versions. Suggested-by: Andrey Ryabinin Reported-by: Matthias Kaehlcke Signed-off-by: Josh Poimboeuf Cc: Alexander Potapenko Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Dmitriy Vyukov Cc: Linus Torvalds Cc: Miguel Bernal Marin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/3db862e970c432ae823cf515c52b54fec8270e0e.1505942196.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 3 +-- arch/x86/include/asm/asm.h | 11 +++++++++++ arch/x86/include/asm/mshyperv.h | 10 ++++------ arch/x86/include/asm/paravirt_types.h | 14 +++++++------- arch/x86/include/asm/preempt.h | 15 +++++---------- arch/x86/include/asm/processor.h | 6 ++---- arch/x86/include/asm/rwsem.h | 4 ++-- arch/x86/include/asm/uaccess.h | 4 ++-- arch/x86/include/asm/xen/hypercall.h | 5 ++--- arch/x86/kvm/emulate.c | 3 +-- arch/x86/kvm/vmx.c | 3 +-- arch/x86/mm/fault.c | 3 +-- tools/objtool/Documentation/stack-validation.txt | 6 +++--- 13 files changed, 42 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 1b020381ab38..c096624137ae 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ { \ - register void *__sp asm(_ASM_SP); \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ - : output, "+r" (__sp) \ + : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input); \ } diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 676ee5807d86..c1eadbaf1115 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -132,4 +132,15 @@ /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif +#ifndef __ASSEMBLY__ +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned int __asm_call_sp asm("esp"); +#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) +#endif + #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 63cc96f064dc..738503e1f80c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; u64 hv_status; - register void *__sp asm(_ASM_SP); #ifdef CONFIG_X86_64 if (!hv_hypercall_pg) @@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) __asm__ __volatile__("mov %4, %%r8\n" "call *%5" - : "=a" (hv_status), "+r" (__sp), + : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) : "r" (output_address), "m" (hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); @@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) __asm__ __volatile__("call *%7" : "=A" (hv_status), - "+c" (input_address_lo), "+r" (__sp) + "+c" (input_address_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input_address_hi), "D"(output_address_hi), "S"(output_address_lo), @@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) { u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; - register void *__sp asm(_ASM_SP); #ifdef CONFIG_X86_64 { __asm__ __volatile__("call *%4" - : "=a" (hv_status), "+r" (__sp), + : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input1) : "m" (hv_hypercall_pg) : "cc", "r8", "r9", "r10", "r11"); @@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) __asm__ __volatile__ ("call *%5" : "=A"(hv_status), "+c"(input1_lo), - "+r"(__sp) + ASM_CALL_CONSTRAINT : "A" (control), "b" (input1_hi), "m" (hv_hypercall_pg) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 42873edd9f9d..280d94c36dad 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -459,8 +459,8 @@ int paravirt_disable_iospace(void); */ #ifdef CONFIG_X86_32 #define PVOP_VCALL_ARGS \ - unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \ - register void *__sp asm("esp") + unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x)) @@ -480,8 +480,8 @@ int paravirt_disable_iospace(void); /* [re]ax isn't an arg, but the return val */ #define PVOP_VCALL_ARGS \ unsigned long __edi = __edi, __esi = __esi, \ - __edx = __edx, __ecx = __ecx, __eax = __eax; \ - register void *__sp asm("rsp") + __edx = __edx, __ecx = __ecx, __eax = __eax; + #define PVOP_CALL_ARGS PVOP_VCALL_ARGS #define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x)) @@ -532,7 +532,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -542,7 +542,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ @@ -569,7 +569,7 @@ int paravirt_disable_iospace(void); asm volatile(pre \ paravirt_alt(PARAVIRT_CALL) \ post \ - : call_clbr, "+r" (__sp) \ + : call_clbr, ASM_CALL_CONSTRAINT \ : paravirt_type(op), \ paravirt_clobber(clbr), \ ##__VA_ARGS__ \ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index ec1f3c651150..4f44505dbf87 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset) #ifdef CONFIG_PREEMPT extern asmlinkage void ___preempt_schedule(void); -# define __preempt_schedule() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \ -}) +# define __preempt_schedule() \ + asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT) extern asmlinkage void preempt_schedule(void); extern asmlinkage void ___preempt_schedule_notrace(void); -# define __preempt_schedule_notrace() \ -({ \ - register void *__sp asm(_ASM_SP); \ - asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \ -}) +# define __preempt_schedule_notrace() \ + asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT) + extern asmlinkage void preempt_schedule_notrace(void); #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3fa26a61eabc..b390ff76e58f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -677,8 +677,6 @@ static inline void sync_core(void) * Like all of Linux's memory ordering operations, this is a * compiler barrier as well. */ - register void *__sp asm(_ASM_SP); - #ifdef CONFIG_X86_32 asm volatile ( "pushfl\n\t" @@ -686,7 +684,7 @@ static inline void sync_core(void) "pushl $1f\n\t" "iret\n\t" "1:" - : "+r" (__sp) : : "memory"); + : ASM_CALL_CONSTRAINT : : "memory"); #else unsigned int tmp; @@ -703,7 +701,7 @@ static inline void sync_core(void) "iretq\n\t" UNWIND_HINT_RESTORE "1:" - : "=&r" (tmp), "+r" (__sp) : : "cc", "memory"); + : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); #endif } diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index a34e0d4b957d..7116b7931c7b 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ - register void *__sp asm(_ASM_SP); \ \ asm volatile("# beginning down_write\n\t" \ LOCK_PREFIX " xadd %1,(%4)\n\t" \ @@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ + : "+m" (sem->count), "=d" (tmp), \ + "=a" (ret), ASM_CALL_CONSTRAINT \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 184eb9894dae..78e8fcc87d4c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) ({ \ int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ - register void *__sp asm(_ASM_SP); \ __chk_user_ptr(ptr); \ might_fault(); \ asm volatile("call __get_user_%P4" \ - : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \ + : "=a" (__ret_gu), "=r" (__val_gu), \ + ASM_CALL_CONSTRAINT \ : "0" (ptr), "i" (sizeof(*(ptr)))); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 9606688caa4b..128a1a0b1450 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[]; register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ - register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \ - register void *__sp asm(_ASM_SP); + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; -#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp) +#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT #define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) #define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) #define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 16bf6655aa85..f23f13403f33 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5296,7 +5296,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) { - register void *__sp asm(_ASM_SP); ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; if (!(ctxt->d & ByteOp)) @@ -5304,7 +5303,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags), - [fastop]"+S"(fop), "+r"(__sp) + [fastop]"+S"(fop), ASM_CALL_CONSTRAINT : "c"(ctxt->src2.val)); ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 06c0c6d0541e..6ee237f509dc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9036,7 +9036,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) { u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - register void *__sp asm(_ASM_SP); if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { @@ -9065,7 +9064,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 [sp]"=&r"(tmp), #endif - "+r"(__sp) + ASM_CALL_CONSTRAINT : [entry]"r"(entry), [ss]"i"(__KERNEL_DS), diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b836a7274e12..39567b5c33da 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (is_vmalloc_addr((void *)address) && (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - register void *__sp asm("rsp"); unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); /* * We're likely to be running with very little stack space @@ -821,7 +820,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, asm volatile ("movq %[stack], %%rsp\n\t" "call handle_stack_overflow\n\t" "1: jmp 1b" - : "+r" (__sp) + : ASM_CALL_CONSTRAINT : "D" ("kernel stack overflow (page fault)"), "S" (regs), "d" (address), [stack] "rm" (stack)); diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 6a1af43862df..3995735a878f 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them. If it's a GCC-compiled .c file, the error may be because the function uses an inline asm() statement which has a "call" instruction. An asm() statement with a call instruction must declare the use of the - stack pointer in its output operand. For example, on x86_64: + stack pointer in its output operand. On x86_64, this means adding + the ASM_CALL_CONSTRAINT as an output constraint: - register void *__sp asm("rsp"); - asm volatile("call func" : "+r" (__sp)); + asm volatile("call func" : ASM_CALL_CONSTRAINT); Otherwise the stack frame may not get created before the call. -- cgit v1.2.3