summaryrefslogtreecommitdiff
path: root/arch/i386/kernel/entry.S
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2006-12-07 02:14:02 +0100
committerAndi Kleen <andi@basil.nowhere.org>2006-12-07 02:14:02 +0100
commitf95d47caae5302a63d92be9a0292abc90e2a14e1 (patch)
treecfa963975d104c56aba28df6c941759175ed4b98 /arch/i386/kernel/entry.S
parent62111195800d80c66cdc69063ea3145878c99fbf (diff)
downloadlwn-f95d47caae5302a63d92be9a0292abc90e2a14e1.tar.gz
lwn-f95d47caae5302a63d92be9a0292abc90e2a14e1.zip
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related changes: 1: Most significantly, %gs is now used in the kernel. This means that on entry, the old value of %gs is saved away, and it is reloaded with __KERNEL_PDA. 2: entry.S constructs the stack in the shape of struct pt_regs, and this is passed around the kernel so that the process's saved register state can be accessed. Unfortunately struct pt_regs doesn't currently have space for %gs (or %fs). This patch extends pt_regs to add space for gs (no space is allocated for %fs, since it won't be used, and it would just complicate the code in entry.S to work around the space). 3: Because %gs is now saved on the stack like %ds, %es and the integer registers, there are a number of places where it no longer needs to be handled specially; namely context switch, and saving/restoring the register state in a signal context. 4: And since kernel threads run in kernel space and call normal kernel code, they need to be created with their %gs == __KERNEL_PDA. Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Zachary Amsden <zach@vmware.com> Cc: Jan Beulich <jbeulich@novell.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/kernel/entry.S')
-rw-r--r--arch/i386/kernel/entry.S70
1 files changed, 48 insertions, 22 deletions
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 0069bf01603e..b99d4a160078 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -30,12 +30,13 @@
* 18(%esp) - %eax
* 1C(%esp) - %ds
* 20(%esp) - %es
- * 24(%esp) - orig_eax
- * 28(%esp) - %eip
- * 2C(%esp) - %cs
- * 30(%esp) - %eflags
- * 34(%esp) - %oldesp
- * 38(%esp) - %oldss
+ * 24(%esp) - %gs
+ * 28(%esp) - orig_eax
+ * 2C(%esp) - %eip
+ * 30(%esp) - %cs
+ * 34(%esp) - %eflags
+ * 38(%esp) - %oldesp
+ * 3C(%esp) - %oldss
*
* "current" is in register %ebx during any slow entries.
*/
@@ -92,6 +93,9 @@ VM_MASK = 0x00020000
#define SAVE_ALL \
cld; \
+ pushl %gs; \
+ CFI_ADJUST_CFA_OFFSET 4;\
+ /*CFI_REL_OFFSET gs, 0;*/\
pushl %es; \
CFI_ADJUST_CFA_OFFSET 4;\
/*CFI_REL_OFFSET es, 0;*/\
@@ -121,7 +125,9 @@ VM_MASK = 0x00020000
CFI_REL_OFFSET ebx, 0;\
movl $(__USER_DS), %edx; \
movl %edx, %ds; \
- movl %edx, %es;
+ movl %edx, %es; \
+ movl $(__KERNEL_PDA), %edx; \
+ movl %edx, %gs
#define RESTORE_INT_REGS \
popl %ebx; \
@@ -154,17 +160,22 @@ VM_MASK = 0x00020000
2: popl %es; \
CFI_ADJUST_CFA_OFFSET -4;\
/*CFI_RESTORE es;*/\
-.section .fixup,"ax"; \
-3: movl $0,(%esp); \
- jmp 1b; \
+3: popl %gs; \
+ CFI_ADJUST_CFA_OFFSET -4;\
+ /*CFI_RESTORE gs;*/\
+.pushsection .fixup,"ax"; \
4: movl $0,(%esp); \
+ jmp 1b; \
+5: movl $0,(%esp); \
jmp 2b; \
-.previous; \
+6: movl $0,(%esp); \
+ jmp 3b; \
.section __ex_table,"a";\
.align 4; \
- .long 1b,3b; \
- .long 2b,4b; \
-.previous
+ .long 1b,4b; \
+ .long 2b,5b; \
+ .long 3b,6b; \
+.popsection
#define RING0_INT_FRAME \
CFI_STARTPROC simple;\
@@ -231,6 +242,7 @@ check_userspace:
andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
cmpl $USER_RPL, %eax
jb resume_kernel # not returning to v8086 or userspace
+
ENTRY(resume_userspace)
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
# setting need_resched or sigpending
@@ -327,9 +339,16 @@ sysenter_past_esp:
movl PT_OLDESP(%esp), %ecx
xorl %ebp,%ebp
TRACE_IRQS_ON
+1: mov PT_GS(%esp), %gs
ENABLE_INTERRUPTS_SYSEXIT
CFI_ENDPROC
-
+.pushsection .fixup,"ax"
+2: movl $0,PT_GS(%esp)
+ jmp 1b
+.section __ex_table,"a"
+ .align 4
+ .long 1b,2b
+.popsection
# system call handler stub
ENTRY(system_call)
@@ -375,7 +394,7 @@ restore_nocheck:
TRACE_IRQS_IRET
restore_nocheck_notrace:
RESTORE_REGS
- addl $4, %esp
+ addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
1: INTERRUPT_RETURN
.section .fixup,"ax"
@@ -588,6 +607,10 @@ KPROBE_ENTRY(page_fault)
CFI_ADJUST_CFA_OFFSET 4
ALIGN
error_code:
+ /* the function address is in %gs's slot on the stack */
+ pushl %es
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET es, 0*/
pushl %ds
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ds, 0*/
@@ -613,18 +636,20 @@ error_code:
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET ebx, 0
cld
- pushl %es
+ pushl %gs
CFI_ADJUST_CFA_OFFSET 4
- /*CFI_REL_OFFSET es, 0*/
+ /*CFI_REL_OFFSET gs, 0*/
+ movl $(__KERNEL_PDA), %ecx
+ movl %ecx, %gs
UNWIND_ESPFIX_STACK
popl %ecx
CFI_ADJUST_CFA_OFFSET -4
/*CFI_REGISTER es, ecx*/
- movl PT_ES(%esp), %edi # get the function address
+ movl PT_GS(%esp), %edi # get the function address
movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp)
- movl %ecx, PT_ES(%esp)
- /*CFI_REL_OFFSET es, ES*/
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+ mov %ecx, PT_GS(%esp)
+ /*CFI_REL_OFFSET gs, ES*/
movl $(__USER_DS), %ecx
movl %ecx, %ds
movl %ecx, %es
@@ -936,6 +961,7 @@ ENTRY(arch_unwind_init_running)
movl %ebx, PT_EAX(%edx)
movl $__USER_DS, PT_DS(%edx)
movl $__USER_DS, PT_ES(%edx)
+ movl $0, PT_GS(%edx)
movl %ebx, PT_ORIG_EAX(%edx)
movl %ecx, PT_EIP(%edx)
movl 12(%esp), %ecx