diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 10:05:52 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 10:05:52 +0900 |
commit | 4e21fc138bfd7fe625ff5dc81541399aaf9d429b (patch) | |
tree | 43bedf14d2eee7711b8241dcfd6bd7b8737d9bd5 | |
parent | 8418263e3547ed3816475e4c55a77004f0426ee6 (diff) | |
parent | 5522be6a4624a5f505555569e4d9cee946630686 (diff) | |
download | lwn-4e21fc138bfd7fe625ff5dc81541399aaf9d429b.tar.gz lwn-4e21fc138bfd7fe625ff5dc81541399aaf9d429b.zip |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
Pull third pile of kernel_execve() patches from Al Viro:
"The last bits of infrastructure for kernel_thread() et.al., with
alpha/arm/x86 use of those. Plus sanitizing the asm glue and
do_notify_resume() on alpha, fixing the "disabled irq while running
task_work stuff" breakage there.
At that point the rest of kernel_thread/kernel_execve/sys_execve work
can be done independently for different architectures. The only
pending bits that do depend on having all architectures converted are
restrictred to fs/* and kernel/* - that'll obviously have to wait for
the next cycle.
I thought we'd have to wait for all of them done before we start
eliminating the longjump-style insanity in kernel_execve(), but it
turned out there's a very simple way to do that without flagday-style
changes."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal:
alpha: switch to saner kernel_execve() semantics
arm: switch to saner kernel_execve() semantics
x86, um: convert to saner kernel_execve() semantics
infrastructure for saner ret_from_kernel_thread semantics
make sure that kernel_thread() callbacks call do_exit() themselves
make sure that we always have a return path from kernel_execve()
ppc: eeh_event should just use kthread_run()
don't bother with kernel_thread/kernel_execve for launching linuxrc
alpha: get rid of switch_stack argument of do_work_pending()
alpha: don't bother passing switch_stack separately from regs
alpha: take SIGPENDING/NOTIFY_RESUME loop into signal.c
alpha: simplify TIF_NEED_RESCHED handling
-rw-r--r-- | arch/Kconfig | 3 | ||||
-rw-r--r-- | arch/alpha/Kconfig | 1 | ||||
-rw-r--r-- | arch/alpha/include/asm/unistd.h | 1 | ||||
-rw-r--r-- | arch/alpha/kernel/entry.S | 87 | ||||
-rw-r--r-- | arch/alpha/kernel/signal.c | 48 | ||||
-rw-r--r-- | arch/arm/Kconfig | 1 | ||||
-rw-r--r-- | arch/arm/include/asm/unistd.h | 1 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 29 | ||||
-rw-r--r-- | arch/arm/kernel/process.c | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_event.c | 5 | ||||
-rw-r--r-- | arch/um/include/asm/processor-generic.h | 2 | ||||
-rw-r--r-- | arch/um/include/shared/os.h | 1 | ||||
-rw-r--r-- | arch/um/kernel/exec.c | 5 | ||||
-rw-r--r-- | arch/um/kernel/process.c | 10 | ||||
-rw-r--r-- | arch/um/os-Linux/process.c | 13 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/unistd.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 31 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 24 | ||||
-rw-r--r-- | arch/x86/um/Kconfig | 1 | ||||
-rw-r--r-- | include/linux/syscalls.h | 8 | ||||
-rw-r--r-- | init/do_mounts_initrd.c | 41 | ||||
-rw-r--r-- | init/main.c | 33 | ||||
-rw-r--r-- | kernel/kmod.c | 7 | ||||
-rw-r--r-- | kernel/kthread.c | 1 |
25 files changed, 137 insertions, 223 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 26a28419cafc..a79a1ad8bb96 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -274,6 +274,9 @@ config ARCH_WANT_OLD_COMPAT_IPC config GENERIC_KERNEL_THREAD bool +config GENERIC_KERNEL_EXECVE + bool + config HAVE_ARCH_SECCOMP_FILTER bool help diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 7da91246e279..7a08cfb80ee8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -21,6 +21,7 @@ config ALPHA select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 3cb6c1188984..7826e227e4d0 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -482,7 +482,6 @@ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __ARCH_WANT_SYS_EXECVE -#define __ARCH_WANT_KERNEL_EXECVE /* "Conditional" syscalls. What we want is diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 2a359c9ee3cd..a7607832dd4f 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -311,7 +311,7 @@ entSys: .align 4 ret_from_sys_call: - cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ + cmovne $26, 0, $18 /* $18 = 0 => non-restartable */ ldq $0, SP_OFF($sp) and $0, 8, $0 beq $0, ret_to_kernel @@ -320,8 +320,8 @@ ret_to_user: sampling and the rti. */ lda $16, 7 call_pal PAL_swpipl - ldl $5, TI_FLAGS($8) - and $5, _TIF_WORK_MASK, $2 + ldl $17, TI_FLAGS($8) + and $17, _TIF_WORK_MASK, $2 bne $2, work_pending restore_all: RESTORE_ALL @@ -341,10 +341,10 @@ $syscall_error: * frame to indicate that a negative return value wasn't an * error number.. */ - ldq $19, 0($sp) /* old syscall nr (zero if success) */ - beq $19, $ret_success + ldq $18, 0($sp) /* old syscall nr (zero if success) */ + beq $18, $ret_success - ldq $20, 72($sp) /* .. and this a3 */ + ldq $19, 72($sp) /* .. and this a3 */ subq $31, $0, $0 /* with error in v0 */ addq $31, 1, $1 /* set a3 for errno return */ stq $0, 0($sp) @@ -362,51 +362,35 @@ $ret_success: * Do all cleanup when returning from all interrupts and system calls. * * Arguments: - * $5: TI_FLAGS. * $8: current. - * $19: The old syscall number, or zero if this is not a return + * $17: TI_FLAGS. + * $18: The old syscall number, or zero if this is not a return * from a syscall that errored and is possibly restartable. - * $20: The old a3 value + * $19: The old a3 value */ .align 4 .ent work_pending work_pending: - and $5, _TIF_NEED_RESCHED, $2 - beq $2, $work_notifysig + and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2 + bne $2, $work_notifysig $work_resched: - subq $sp, 16, $sp - stq $19, 0($sp) /* save syscall nr */ - stq $20, 8($sp) /* and error indication (a3) */ + /* + * We can get here only if we returned from syscall without SIGPENDING + * or got through work_notifysig already. Either case means no syscall + * restarts for us, so let $18 and $19 burn. + */ jsr $26, schedule - ldq $19, 0($sp) - ldq $20, 8($sp) - addq $sp, 16, $sp - /* Make sure need_resched and sigpending don't change between - sampling and the rti. */ - lda $16, 7 - call_pal PAL_swpipl - ldl $5, TI_FLAGS($8) - and $5, _TIF_WORK_MASK, $2 - beq $2, restore_all - and $5, _TIF_NEED_RESCHED, $2 - bne $2, $work_resched + mov 0, $18 + br ret_to_user $work_notifysig: mov $sp, $16 bsr $1, do_switch_stack - mov $sp, $17 - mov $5, $18 - mov $19, $9 /* save old syscall number */ - mov $20, $10 /* save old a3 */ - and $5, _TIF_SIGPENDING, $2 - cmovne $2, 0, $9 /* we don't want double syscall restarts */ - jsr $26, do_notify_resume - mov $9, $19 - mov $10, $20 + jsr $26, do_work_pending bsr $1, undo_switch_stack - br ret_to_user + br restore_all .end work_pending /* @@ -454,9 +438,9 @@ $strace_success: .align 3 $strace_error: - ldq $19, 0($sp) /* old syscall nr (zero if success) */ - beq $19, $strace_success - ldq $20, 72($sp) /* .. and this a3 */ + ldq $18, 0($sp) /* old syscall nr (zero if success) */ + beq $18, $strace_success + ldq $19, 72($sp) /* .. and this a3 */ subq $31, $0, $0 /* with error in v0 */ addq $31, 1, $1 /* set a3 for errno return */ @@ -464,11 +448,11 @@ $strace_error: stq $1, 72($sp) /* a3 for return */ bsr $1, do_switch_stack - mov $19, $9 /* save old syscall number */ - mov $20, $10 /* save old a3 */ + mov $18, $9 /* save old syscall number */ + mov $19, $10 /* save old a3 */ jsr $26, syscall_trace_leave - mov $9, $19 - mov $10, $20 + mov $9, $18 + mov $10, $19 bsr $1, undo_switch_stack mov $31, $26 /* tell "ret_from_sys_call" we can restart */ @@ -619,24 +603,9 @@ ret_from_kernel_thread: mov $9, $27 mov $10, $16 jsr $26, ($9) - ldgp $gp, 0($26) - mov $0, $16 - mov $31, $26 - jmp $31, sys_exit -.end ret_from_kernel_thread - - .globl ret_from_kernel_execve - .align 4 - .ent ret_from_kernel_execve -ret_from_kernel_execve: - mov $16, $sp - /* Avoid the HAE being gratuitously wrong, to avoid restoring it. */ - ldq $2, alpha_mv+HAE_CACHE - stq $2, 152($sp) /* HAE */ mov $31, $19 /* to disable syscall restarts */ br $31, ret_to_user - -.end ret_from_kernel_execve +.end ret_from_kernel_thread /* diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index a8c97d42ec8e..32575f85507d 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -298,8 +298,9 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, - struct switch_stack *sw, unsigned long mask, unsigned long sp) + unsigned long mask, unsigned long sp) { + struct switch_stack *sw = (struct switch_stack *)regs - 1; long i, err = 0; err |= __put_user(on_sig_stack((unsigned long)sc), &sc->sc_onstack); @@ -354,7 +355,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, - struct pt_regs *regs, struct switch_stack * sw) + struct pt_regs *regs) { unsigned long oldsp, r26, err = 0; struct sigframe __user *frame; @@ -364,7 +365,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - err |= setup_sigcontext(&frame->sc, regs, sw, set->sig[0], oldsp); + err |= setup_sigcontext(&frame->sc, regs, set->sig[0], oldsp); if (err) return -EFAULT; @@ -401,7 +402,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs, struct switch_stack * sw) + sigset_t *set, struct pt_regs *regs) { unsigned long oldsp, r26, err = 0; struct rt_sigframe __user *frame; @@ -420,7 +421,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(sas_ss_flags(oldsp), &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, + err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], oldsp); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) @@ -464,15 +465,15 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, */ static inline void handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info, - struct pt_regs * regs, struct switch_stack *sw) + struct pt_regs * regs) { sigset_t *oldset = sigmask_to_save(); int ret; if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(sig, ka, info, oldset, regs, sw); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - ret = setup_frame(sig, ka, oldset, regs, sw); + ret = setup_frame(sig, ka, oldset, regs); if (ret) { force_sigsegv(sig, current); @@ -519,8 +520,7 @@ syscall_restart(unsigned long r0, unsigned long r19, * all (if we get here from anything but a syscall return, it will be 0) */ static void -do_signal(struct pt_regs * regs, struct switch_stack * sw, - unsigned long r0, unsigned long r19) +do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19) { siginfo_t info; int signr; @@ -537,7 +537,7 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw, /* Whee! Actually deliver the signal. */ if (r0) syscall_restart(r0, r19, regs, &ka); - handle_signal(signr, &ka, &info, regs, sw); + handle_signal(signr, &ka, &info, regs); if (single_stepping) ptrace_set_bpt(current); /* re-set bpt */ return; @@ -568,15 +568,23 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw, } void -do_notify_resume(struct pt_regs *regs, struct switch_stack *sw, - unsigned long thread_info_flags, +do_work_pending(struct pt_regs *regs, unsigned long thread_flags, unsigned long r0, unsigned long r19) { - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs, sw, r0, r19); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + do { + if (thread_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + do_signal(regs, r0, r19); + r0 = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 767aae8277fa..431c3753145a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -53,6 +53,7 @@ config ARM select GENERIC_STRNLEN_USER select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index f259921edfe9..91819ad54424 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -479,7 +479,6 @@ #define __ARCH_WANT_SYS_SOCKETCALL #endif #define __ARCH_WANT_SYS_EXECVE -#define __ARCH_WANT_KERNEL_EXECVE /* * "Conditional" syscalls diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index e340fa1db203..417bac1846bd 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -86,35 +86,14 @@ ENDPROC(ret_to_user) */ ENTRY(ret_from_fork) bl schedule_tail + cmp r5, #0 + movne r0, r4 + movne lr, pc + movne pc, r5 get_thread_info tsk - mov why, #1 b ret_slow_syscall ENDPROC(ret_from_fork) -ENTRY(ret_from_kernel_thread) - UNWIND(.fnstart) - UNWIND(.cantunwind) - bl schedule_tail - mov r0, r4 - adr lr, BSYM(1f) @ kernel threads should not exit - mov pc, r5 -1: bl do_exit - nop - UNWIND(.fnend) -ENDPROC(ret_from_kernel_thread) - -/* - * turn a kernel thread into userland process - * use: ret_from_kernel_execve(struct pt_regs *normal) - */ -ENTRY(ret_from_kernel_execve) - mov why, #0 @ not a syscall - str why, [r0, #S_R0] @ ... and we want 0 in ->ARM_r0 as well - get_thread_info tsk @ thread structure - mov sp, r0 @ stack pointer just under pt_regs - b ret_slow_syscall -ENDPROC(ret_from_kernel_execve) - .equ NR_syscalls,0 #define CALL(x) .equ NR_syscalls,NR_syscalls+1 #include "calls.S" diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f98c17ff1957..90084a6de35a 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -373,7 +373,6 @@ void release_thread(struct task_struct *dead_task) } asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); -asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); int copy_thread(unsigned long clone_flags, unsigned long stack_start, @@ -388,13 +387,13 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, *childregs = *regs; childregs->ARM_r0 = 0; childregs->ARM_sp = stack_start; - thread->cpu_context.pc = (unsigned long)ret_from_fork; } else { + memset(childregs, 0, sizeof(struct pt_regs)); thread->cpu_context.r4 = stk_sz; thread->cpu_context.r5 = stack_start; - thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread; childregs->ARM_cpsr = SVC_MODE; } + thread->cpu_context.pc = (unsigned long)ret_from_fork; thread->cpu_context.sp = (unsigned long)childregs; clear_ptrace_hw_breakpoint(p); diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c index 51faaac8abe6..185bedd926df 100644 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ b/arch/powerpc/platforms/pseries/eeh_event.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/slab.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include <asm/eeh_event.h> #include <asm/ppc-pci.h> @@ -59,8 +60,6 @@ static int eeh_event_handler(void * dummy) struct eeh_event *event; struct eeh_pe *pe; - set_task_comm(current, "eehd"); - spin_lock_irqsave(&eeh_eventlist_lock, flags); event = NULL; @@ -108,7 +107,7 @@ static int eeh_event_handler(void * dummy) */ static void eeh_thread_launcher(struct work_struct *dummy) { - if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) + if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) printk(KERN_ERR "Failed to start EEH daemon\n"); } diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 1e82e954e978..c03cd5a02364 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -26,7 +26,6 @@ struct thread_struct { jmp_buf *fault_catcher; struct task_struct *prev_sched; unsigned long temp_stack; - jmp_buf *exec_buf; struct arch_thread arch; jmp_buf switch_buf; int mm_count; @@ -54,7 +53,6 @@ struct thread_struct { .fault_addr = NULL, \ .prev_sched = NULL, \ .temp_stack = 0, \ - .exec_buf = NULL, \ .arch = INIT_ARCH_THREAD, \ .request = { 0 } \ } diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index 44883049c11d..95feaa47a2fb 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -191,7 +191,6 @@ extern int os_getpid(void); extern int os_getpgrp(void); extern void init_new_thread_signals(void); -extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr); extern int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, int r, int w, int x); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index ab019c7f0b57..3a8ece7d09ca 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -47,8 +47,3 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) #endif } EXPORT_SYMBOL(start_thread); - -void __noreturn ret_from_kernel_execve(struct pt_regs *unused) -{ - UML_LONGJMP(current->thread.exec_buf, 1); -} diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 30629783b3e0..b6d699cdd557 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -135,14 +135,10 @@ void new_thread_handler(void) arg = current->thread.request.u.thread.arg; /* - * The return value is 1 if the kernel thread execs a process, - * 0 if it just exits + * callback returns only if the kernel thread execs a process */ - n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if (n == 1) - userspace(¤t->thread.regs.regs); - else - do_exit(0); + n = fn(arg); + userspace(¤t->thread.regs.regs); } /* Called magically, see new_thread_handler above */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 162bea3d91b2..b8f34c9e53ae 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -244,16 +244,3 @@ void init_new_thread_signals(void) signal(SIGWINCH, SIG_IGN); signal(SIGTERM, SIG_DFL); } - -int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr) -{ - jmp_buf buf; - int n; - - *jmp_ptr = &buf; - n = UML_SETJMP(&buf); - if (n != 0) - return n; - (*fn)(arg); - return 0; -} diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 42d2c35a5bbd..70071b19eb98 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,7 @@ config X86 select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 55d155560fdf..16f3fc6ebf2e 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -51,7 +51,6 @@ # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID # define __ARCH_WANT_SYS_EXECVE -# define __ARCH_WANT_KERNEL_EXECVE /* * "Conditional" syscalls diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2c6340796fe9..a1193aef6d7d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -299,12 +299,20 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) -ENTRY(ret_from_kernel_execve) - movl %eax, %esp - movl $0,PT_EAX(%esp) +ENTRY(ret_from_kernel_thread) + CFI_STARTPROC + pushl_cfi %eax + call schedule_tail GET_THREAD_INFO(%ebp) + popl_cfi %eax + pushl_cfi $0x0202 # Reset kernel eflags + popfl_cfi + movl PT_EBP(%esp),%eax + call *PT_EBX(%esp) + movl $0,PT_EAX(%esp) jmp syscall_exit -END(ret_from_kernel_execve) + CFI_ENDPROC +ENDPROC(ret_from_kernel_thread) /* * Interrupt exit functions should be protected against kprobes @@ -1015,21 +1023,6 @@ END(spurious_interrupt_bug) */ .popsection -ENTRY(ret_from_kernel_thread) - CFI_STARTPROC - pushl_cfi %eax - call schedule_tail - GET_THREAD_INFO(%ebp) - popl_cfi %eax - pushl_cfi $0x0202 # Reset kernel eflags - popfl_cfi - movl PT_EBP(%esp),%eax - call *PT_EBX(%esp) - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(ret_from_kernel_thread) - #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cdc790c78f32..0c58952d64e8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -563,15 +563,13 @@ ENTRY(ret_from_fork) jmp ret_from_sys_call # go to the SYSRET fastpath 1: - subq $REST_SKIP, %rsp # move the stack pointer back + subq $REST_SKIP, %rsp # leave space for volatiles CFI_ADJUST_CFA_OFFSET REST_SKIP movq %rbp, %rdi call *%rbx - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - + movl $0, RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -1326,20 +1324,6 @@ bad_gs: jmp 2b .previous -ENTRY(ret_from_kernel_execve) - movq %rdi, %rsp - movl $0, RAX(%rsp) - // RESTORE_REST - movq 0*8(%rsp), %r15 - movq 1*8(%rsp), %r14 - movq 2*8(%rsp), %r13 - movq 3*8(%rsp), %r12 - movq 4*8(%rsp), %rbp - movq 5*8(%rsp), %rbx - addq $(6*8), %rsp - jmp int_ret_from_sys_call -END(ret_from_kernel_execve) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 30c4eec033af..9fa950df80e5 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -14,6 +14,7 @@ config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config 64BIT bool "64-bit kernel" if SUBARCH = "x86" diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 19439c75c5b2..727f0cd73921 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -827,7 +827,15 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, const char __user *pathname); asmlinkage long sys_syncfs(int fd); +#ifndef CONFIG_GENERIC_KERNEL_EXECVE int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); +#else +#define kernel_execve(filename, argv, envp) \ + do_execve(filename, \ + (const char __user *const __user *)argv, \ + (const char __user *const __user *)envp, \ + current_pt_regs()) +#endif asmlinkage long sys_perf_event_open( diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 135959a276be..5e4ded51788e 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -16,13 +16,13 @@ #include <linux/initrd.h> #include <linux/sched.h> #include <linux/freezer.h> +#include <linux/kmod.h> #include "do_mounts.h" unsigned long initrd_start, initrd_end; int initrd_below_start_ok; unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ -static int __initdata old_fd, root_fd; static int __initdata mount_initrd = 1; static int __init no_initrd(char *str) @@ -33,33 +33,29 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void *_shell) +static int init_linuxrc(struct subprocess_info *info, struct cred *new) { - static const char *argv[] = { "linuxrc", NULL, }; - extern const char *envp_init[]; - const char *shell = _shell; - - sys_close(old_fd);sys_close(root_fd); + sys_unshare(CLONE_FS | CLONE_FILES); + /* move initrd over / and chdir/chroot in initrd root */ + sys_chdir("/root"); + sys_mount(".", "/", NULL, MS_MOVE, NULL); + sys_chroot("."); sys_setsid(); - return kernel_execve(shell, argv, envp_init); + return 0; } static void __init handle_initrd(void) { + static char *argv[] = { "linuxrc", NULL, }; + extern char *envp_init[]; int error; - int pid; real_root_dev = new_encode_dev(ROOT_DEV); create_dev("/dev/root.old", Root_RAM0); /* mount initrd on rootfs' /root */ mount_block_root("/dev/root.old", root_mountflags & ~MS_RDONLY); sys_mkdir("/old", 0700); - root_fd = sys_open("/", 0, 0); - old_fd = sys_open("/old", 0, 0); - /* move initrd over / and chdir/chroot in initrd root */ - sys_chdir("/root"); - sys_mount(".", "/", NULL, MS_MOVE, NULL); - sys_chroot("."); + sys_chdir("/old"); /* * In case that a resume from disk is carried out by linuxrc or one of @@ -67,27 +63,22 @@ static void __init handle_initrd(void) */ current->flags |= PF_FREEZER_SKIP; - pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); - if (pid > 0) - while (pid != sys_wait4(-1, NULL, 0, NULL)) - yield(); + call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC, + init_linuxrc, NULL, NULL); current->flags &= ~PF_FREEZER_SKIP; /* move initrd to rootfs' /old */ - sys_fchdir(old_fd); - sys_mount("/", ".", NULL, MS_MOVE, NULL); + sys_mount("..", ".", NULL, MS_MOVE, NULL); /* switch root and cwd back to / of rootfs */ - sys_fchdir(root_fd); - sys_chroot("."); - sys_close(old_fd); - sys_close(root_fd); + sys_chroot(".."); if (new_decode_dev(real_root_dev) == Root_RAM0) { sys_chdir("/old"); return; } + sys_chdir("/"); ROOT_DEV = new_decode_dev(real_root_dev); mount_root(); diff --git a/init/main.c b/init/main.c index 313360fe1118..9cf77ab138a6 100644 --- a/init/main.c +++ b/init/main.c @@ -69,6 +69,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <linux/file.h> +#include <linux/ptrace.h> #include <asm/io.h> #include <asm/bugs.h> @@ -791,17 +792,17 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(const char *init_filename) +static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - kernel_execve(init_filename, argv_init, envp_init); + return kernel_execve(init_filename, argv_init, envp_init); } -/* This is a non __init function. Force it to be noinline otherwise gcc - * makes it inline to init() and it becomes part of init.text section - */ -static noinline int init_post(void) +static void __init kernel_init_freeable(void); + +static int __ref kernel_init(void *unused) { + kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); @@ -813,7 +814,8 @@ static noinline int init_post(void) flush_delayed_fput(); if (ramdisk_execute_command) { - run_init_process(ramdisk_execute_command); + if (!run_init_process(ramdisk_execute_command)) + return 0; printk(KERN_WARNING "Failed to execute %s\n", ramdisk_execute_command); } @@ -825,20 +827,22 @@ static noinline int init_post(void) * trying to recover a really broken machine. */ if (execute_command) { - run_init_process(execute_command); + if (!run_init_process(execute_command)) + return 0; printk(KERN_WARNING "Failed to execute %s. Attempting " "defaults...\n", execute_command); } - run_init_process("/sbin/init"); - run_init_process("/etc/init"); - run_init_process("/bin/init"); - run_init_process("/bin/sh"); + if (!run_init_process("/sbin/init") || + !run_init_process("/etc/init") || + !run_init_process("/bin/init") || + !run_init_process("/bin/sh")) + return 0; panic("No init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); } -static int __init kernel_init(void * unused) +static void __init kernel_init_freeable(void) { /* * Wait until kthreadd is all set-up. @@ -893,7 +897,4 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - - init_post(); - return 0; } diff --git a/kernel/kmod.c b/kernel/kmod.c index 6f99aead66c6..1c317e386831 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -37,6 +37,7 @@ #include <linux/notifier.h> #include <linux/suspend.h> #include <linux/rwsem.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> #include <trace/events/module.h> @@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data) retval = kernel_execve(sub_info->path, (const char *const *)sub_info->argv, (const char *const *)sub_info->envp); + if (!retval) + return 0; /* Exec failed? */ fail: sub_info->retval = retval; - return 0; + do_exit(0); } static int call_helper(void *data) @@ -292,7 +295,7 @@ static int wait_for_helper(void *data) } umh_complete(sub_info); - return 0; + do_exit(0); } /* This is run by khelper thread */ diff --git a/kernel/kthread.c b/kernel/kthread.c index 146a6fa96825..29fb60caecb5 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -16,6 +16,7 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/freezer.h> +#include <linux/ptrace.h> #include <trace/events/sched.h> static DEFINE_SPINLOCK(kthread_create_lock); |