diff options
author | Michal Simek <monstr@monstr.eu> | 2009-05-26 16:30:13 +0200 |
---|---|---|
committer | Michal Simek <monstr@monstr.eu> | 2009-05-26 16:45:15 +0200 |
commit | 5de96121009f4de43ffeb7160109e23132278c07 (patch) | |
tree | 6d8418195f2a68e185ef993471a6e8ffd8bcb613 | |
parent | 23098649e0f8861ea69fac62cf6ba721b83065dc (diff) | |
download | lwn-5de96121009f4de43ffeb7160109e23132278c07.tar.gz lwn-5de96121009f4de43ffeb7160109e23132278c07.zip |
microblaze_mmu_v2: Page fault handling high level - fault.c
Signed-off-by: Michal Simek <monstr@monstr.eu>
-rw-r--r-- | arch/microblaze/mm/fault.c | 304 |
1 files changed, 304 insertions, 0 deletions
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c new file mode 100644 index 000000000000..5e67cd1fab40 --- /dev/null +++ b/arch/microblaze/mm/fault.c @@ -0,0 +1,304 @@ +/* + * arch/microblaze/mm/fault.c + * + * Copyright (C) 2007 Xilinx, Inc. All rights reserved. + * + * Derived from "arch/ppc/mm/fault.c" + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file COPYING in the main directory of this + * archive for more details. + * + */ + +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/interrupt.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/system.h> +#include <linux/uaccess.h> +#include <asm/exceptions.h> + +#if defined(CONFIG_KGDB) +int debugger_kernel_faults = 1; +#endif + +static unsigned long pte_misses; /* updated by do_page_fault() */ +static unsigned long pte_errors; /* updated by do_page_fault() */ + +/* + * Check whether the instruction at regs->pc is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ + unsigned int inst; + + if (get_user(inst, (unsigned int *)regs->pc)) + return 0; + /* check for 1 in the rD field */ + if (((inst >> 21) & 0x1f) != 1) + return 0; + /* check for store opcodes */ + if ((inst & 0xd0000000) == 0xd0000000) + return 1; + return 0; +} + + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from do_page_fault above and from some of the procedures + * in traps.c. + */ +static void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *fixup; +/* MS: no context */ + /* Are we prepared to handle this fault? */ + fixup = search_exception_tables(regs->pc); + if (fixup) { + regs->pc = fixup->fixup; + return; + } + + /* kernel has accessed a bad area */ +#if defined(CONFIG_KGDB) + if (debugger_kernel_faults) + debugger(regs); +#endif + die("kernel access of bad area", regs, sig); +} + +/* + * The error_code parameter is ESR for a data fault, + * 0 for an instruction fault. + */ +void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + int code = SEGV_MAPERR; + int is_write = error_code & ESR_S; + int fault; + + regs->ear = address; + regs->esr = error_code; + + /* On a kernel SLB miss we can only check for a valid exception entry */ + if (kernel_mode(regs) && (address >= TASK_SIZE)) { + printk(KERN_WARNING "kernel task_size exceed"); + _exception(SIGSEGV, regs, code, address); + } + + /* for instr TLB miss and instr storage exception ESR_S is undefined */ + if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) + is_write = 0; + +#if defined(CONFIG_KGDB) + if (debugger_fault_handler && regs->trap == 0x300) { + debugger_fault_handler(regs); + return; + } +#endif /* CONFIG_KGDB */ + + if (in_atomic() || mm == NULL) { + /* FIXME */ + if (kernel_mode(regs)) { + printk(KERN_EMERG + "Page fault in kernel mode - Oooou!!! pid %d\n", + current->pid); + _exception(SIGSEGV, regs, code, address); + return; + } + /* in_atomic() in user mode is really bad, + as is current->mm == NULL. */ + printk(KERN_EMERG "Page fault in user mode with " + "in_atomic(), mm = %p\n", mm); + printk(KERN_EMERG "r15 = %lx MSR = %lx\n", + regs->r15, regs->msr); + die("Weird page fault", regs, SIGSEGV); + } + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunately, in the case of an + * erroneous fault occurring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibility of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (kernel_mode(regs) && !search_exception_tables(regs->pc)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + + if (vma->vm_start <= address) + goto good_area; + + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + + if (!is_write) + goto bad_area; + + /* + * N.B. The ABI allows programs to access up to + * a few hundred bytes below the stack pointer (TBD). + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + goto bad_area; + + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->r1 + && (kernel_mode(regs) || !store_updates_sp(regs))) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; + +good_area: + code = SEGV_ACCERR; + + /* a write */ + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + /* protection fault */ + if (error_code & 0x08000000) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ +survive: + fault = handle_mm_fault(mm, vma, address, is_write); + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + if (fault & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + up_read(&mm->mmap_sem); + /* + * keep track of tlb+htab misses that are good addrs but + * just need pte's created via handle_mm_fault() + * -- Cort + */ + pte_misses++; + return; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + pte_errors++; + + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + _exception(SIGSEGV, regs, code, address); +/* info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_code = code; + info.si_addr = (void *) address; + force_sig_info(SIGSEGV, &info, current);*/ + return; + } + + bad_page_fault(regs, address, SIGSEGV); + return; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + up_read(&mm->mmap_sem); + printk(KERN_WARNING "VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + bad_page_fault(regs, address, SIGKILL); + return; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return; + } + bad_page_fault(regs, address, SIGBUS); +} |