summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>2003-05-12 02:37:29 +0000
committerpeter <peter@FreeBSD.org>2003-05-12 02:37:29 +0000
commitc688fcc3ca312086f910aab7cdf045d92c0c364f (patch)
tree64ee040db2f5653c25d9bdeca46546a3cf5d954b
parent6d94cd1e18844aefcc39c20d259853511b278015 (diff)
downloadFreeBSD-src-c688fcc3ca312086f910aab7cdf045d92c0c364f.zip
FreeBSD-src-c688fcc3ca312086f910aab7cdf045d92c0c364f.tar.gz
Give a %fs and %gs to userland. Use swapgs to obtain the kernel %GS.base
value on entry and exit. This isn't as easy as it sounds because when we recursively trap or interrupt, we have to avoid duplicating the swapgs instruction or we end up back with the userland %gs. I implemented this by testing TF_CS to see if we're coming from supervisor mode already, and check for returning to supervisor. To avoid a race with interrupts in the brief period after beginning executing the handler and before the swapgs, convert all trap gates to interrupt gates, and reenable interrupts immediately after the swapgs. I am not happy with this. There are other possible ways to do this that should be investigated. (eg: storing the GS.base MSR value in the trapframe) Add some sysarch functions to let the userland code get to this. Approved by: re (blanket amd64/*)
-rw-r--r--sys/amd64/amd64/cpu_switch.S76
-rw-r--r--sys/amd64/amd64/exception.S111
-rw-r--r--sys/amd64/amd64/genassym.c5
-rw-r--r--sys/amd64/amd64/machdep.c52
-rw-r--r--sys/amd64/amd64/sys_machdep.c26
-rw-r--r--sys/amd64/include/pcb.h2
-rw-r--r--sys/amd64/include/sysarch.h20
-rw-r--r--sys/amd64/isa/icu_vector.S10
8 files changed, 211 insertions, 91 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 8cca838..d5976d4 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -92,27 +92,39 @@ ENTRY(cpu_switch)
jz badsw2 /* no, panic */
#endif
- movq TD_PCB(%rdi),%rdx
+ movq TD_PCB(%rdi),%r8
movq (%rsp),%rax /* Hardware registers */
- movq %rax,PCB_RIP(%rdx)
- movq %rbx,PCB_RBX(%rdx)
- movq %rsp,PCB_RSP(%rdx)
- movq %rbp,PCB_RBP(%rdx)
- movq %r12,PCB_R12(%rdx)
- movq %r13,PCB_R13(%rdx)
- movq %r14,PCB_R14(%rdx)
- movq %r15,PCB_R15(%rdx)
+ movq %rax,PCB_RIP(%r8)
+ movq %rbx,PCB_RBX(%r8)
+ movq %rsp,PCB_RSP(%r8)
+ movq %rbp,PCB_RBP(%r8)
+ movq %r12,PCB_R12(%r8)
+ movq %r13,PCB_R13(%r8)
+ movq %r14,PCB_R14(%r8)
+ movq %r15,PCB_R15(%r8)
pushfq /* PSL */
- popq PCB_RFLAGS(%rdx)
+ popq PCB_RFLAGS(%r8)
+
+ /* Save userland %fs */
+ movl $MSR_FSBASE,%ecx
+ rdmsr
+ movl %eax,PCB_FSBASE(%r8)
+ movl %edx,PCB_FSBASE+4(%r8)
+
+ /* Save userland %gs */
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ movl %eax,PCB_GSBASE(%r8)
+ movl %edx,PCB_GSBASE+4(%r8)
/* have we used fp, and need a save? */
cmpq %rdi,PCPU(FPCURTHREAD)
jne 1f
pushq %rdi
pushq %rsi
- addq $PCB_SAVEFPU,%rdx /* h/w bugs make saving complicated */
- movq %rdx, %rdi
+ addq $PCB_SAVEFPU,%r8 /* h/w bugs make saving complicated */
+ movq %r8, %rdi
call npxsave /* do it in a big C function */
popq %rsi
popq %rdi
@@ -123,12 +135,12 @@ ENTRY(cpu_switch)
testq %rsi,%rsi /* no thread? */
jz badsw3 /* no, panic */
#endif
- movq TD_PCB(%rsi),%rdx
+ movq TD_PCB(%rsi),%r8
xorq %rax, %rax
movl PCPU(CPUID), %eax
/* switch address space */
- movq PCB_CR3(%rdx),%rdx
+ movq PCB_CR3(%r8),%rdx
movq %rdx,%cr3 /* new address space */
/* Release bit from old pmap->pm_active */
@@ -146,26 +158,38 @@ sw1:
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
*/
- movq TD_PCB(%rsi),%rdx
+ movq TD_PCB(%rsi),%r8
+
+ /* Restore userland %fs */
+ movl $MSR_FSBASE,%ecx
+ movl PCB_FSBASE(%r8),%eax
+ movl PCB_FSBASE+4(%r8),%edx
+ wrmsr
+
+ /* Restore userland %gs */
+ movl $MSR_KGSBASE,%ecx
+ movl PCB_GSBASE(%r8),%eax
+ movl PCB_GSBASE+4(%r8),%edx
+ wrmsr
/* Update the TSS_RSP0 pointer for the next interrupt */
- leaq -16(%rdx), %rbx
+ leaq -16(%r8), %rbx
movq %rbx, common_tss + COMMON_TSS_RSP0
/* Restore context. */
- movq PCB_RBX(%rdx),%rbx
- movq PCB_RSP(%rdx),%rsp
- movq PCB_RBP(%rdx),%rbp
- movq PCB_R12(%rdx),%r12
- movq PCB_R13(%rdx),%r13
- movq PCB_R14(%rdx),%r14
- movq PCB_R15(%rdx),%r15
- movq PCB_RIP(%rdx),%rax
+ movq PCB_RBX(%r8),%rbx
+ movq PCB_RSP(%r8),%rsp
+ movq PCB_RBP(%r8),%rbp
+ movq PCB_R12(%r8),%r12
+ movq PCB_R13(%r8),%r13
+ movq PCB_R14(%r8),%r14
+ movq PCB_R15(%r8),%r15
+ movq PCB_RIP(%r8),%rax
movq %rax,(%rsp)
- pushq PCB_RFLAGS(%rdx)
+ pushq PCB_RFLAGS(%r8)
popfq
- movq %rdx, PCPU(CURPCB)
+ movq %r8, PCPU(CURPCB)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
ret
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 9b20068..e36bcb1 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -51,16 +51,16 @@
/*
* Trap and fault vector routines.
*
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
+ * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes
+ * state on the stack but also disables interrupts. This is important for
+ * us for the use of the swapgs instruction. We cannot be interrupted
+ * until the GS.base value is correct. For most traps, we automatically
+ * then enable interrupts if the interrupted context had them enabled.
+ * This is equivalent to the i386 port's use of SDT_SYS386TGT.
*
* The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
+ * the current process. See amd64/include/frame.h.
+ * This includes the current RFLAGS (status register, which includes
* the interrupt disable state prior to the trap), the code segment register,
* and the return instruction pointer are pushed by the cpu. The cpu
* will also push an 'error' code for certain traps. We push a dummy
@@ -75,6 +75,7 @@
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
.type __CONCAT(X,name),@function; __CONCAT(X,name):
#define TRAP(a) pushq $(a) ; jmp alltraps
+#define TRAP_NOEN(a) pushq $(a) ; jmp alltraps_noen
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
@@ -82,11 +83,11 @@ MCOUNT_LABEL(btrap)
IDTVEC(div)
pushq $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
- pushq $0; TRAP(T_TRCTRAP)
+ pushq $0; TRAP_NOEN(T_TRCTRAP)
IDTVEC(nmi)
pushq $0; TRAP(T_NMI)
IDTVEC(bpt)
- pushq $0; TRAP(T_BPTFLT)
+ pushq $0; TRAP_NOEN(T_BPTFLT)
IDTVEC(ofl)
pushq $0; TRAP(T_OFLOW)
IDTVEC(bnd)
@@ -106,7 +107,7 @@ IDTVEC(stk)
IDTVEC(prot)
TRAP(T_PROTFLT)
IDTVEC(page)
- TRAP(T_PAGEFLT)
+ TRAP_NOEN(T_PAGEFLT)
IDTVEC(mchk)
pushq $0; TRAP(T_MCHK)
IDTVEC(rsvd)
@@ -119,10 +120,9 @@ IDTVEC(xmm)
pushq $0; TRAP(T_XMMFLT)
/*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
+ * alltraps entry point. Use swapgs if this is the first time in the
+ * kernel from userland. Reenable interrupts if they were enabled
+ * before the trap. This approximates SDT_SYS386TGT on the i386 port.
*/
SUPERALIGN_TEXT
@@ -130,6 +130,14 @@ IDTVEC(xmm)
.type alltraps,@function
alltraps:
subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz alltraps_testi /* already running with kernel GS.base */
+ swapgs
+alltraps_testi:
+ testl $PSL_I,TF_RFLAGS(%rsp)
+ jz alltraps_pushregs
+ sti
+alltraps_pushregs:
movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
@@ -153,22 +161,43 @@ calltrap:
MEXITCOUNT
jmp doreti /* Handle any pending ASTs */
+ /*
+ * alltraps_noen entry point. Unlike alltraps above, we want to
+ * leave the interrupts disabled. This corresponds to
+ * SDT_SYS386IGT on the i386 port.
+ */
+ SUPERALIGN_TEXT
+ .globl alltraps_noen
+ .type alltraps_noen,@function
+alltraps_noen:
+ subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz alltraps_pushregs /* already running with kernel GS.base */
+ swapgs
+ jmp alltraps_pushregs
+
+IDTVEC(dblfault)
+ pushq $T_DOUBLEFLT
+ subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* already running with kernel GS.base */
+ swapgs
+1: call dblfault_handler
+2: hlt
+ jmp 2b
+
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
*
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
+ * This is a SDT_SYSIDT entry point (unlike the i386 port) so that we
+ * can do a swapgs before enabling interrupts. This is critical because
+ * if we took an interrupt before swapgs, the interrupt code would see
+ * that it originated in supervisor mode and skip the swapgs.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
+ swapgs
+ sti
pushq $2 /* sizeof "int 0x80" */
subq $TF_ERR,%rsp /* skip over tf_trapno */
movq %rdi,TF_RDI(%rsp)
@@ -196,19 +225,21 @@ IDTVEC(int0x80_syscall)
* and the new privilige level. We are still running on the old user stack
* pointer. We have to juggle a few things around to find our stack etc.
* swapgs gives us access to our PCPU space only.
- * XXX The PCPU stuff is stubbed out right now...
*/
IDTVEC(fast_syscall)
- /* XXX swapgs */
+ swapgs
movq %rsp,PCPU(SCRATCH_RSP)
movq common_tss+COMMON_TSS_RSP0,%rsp
/* Now emulate a trapframe. Ugh. */
subq $TF_SIZE,%rsp
- movq $KUDSEL,TF_SS(%rsp)
/* defer TF_RSP till we have a spare register */
movq %r11,TF_RFLAGS(%rsp)
- movq $KUCSEL,TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
+ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
+ movq %r11,TF_RSP(%rsp) /* user stack pointer */
+ sti
+ movq $KUDSEL,TF_SS(%rsp)
+ movq $KUCSEL,TF_CS(%rsp)
movq $2,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp) /* arg 1 */
movq %rsi,TF_RSI(%rsp) /* arg 2 */
@@ -223,14 +254,10 @@ IDTVEC(fast_syscall)
movq %r13,TF_R13(%rsp) /* C preserved */
movq %r14,TF_R14(%rsp) /* C preserved */
movq %r15,TF_R15(%rsp) /* C preserved */
- movq PCPU(SCRATCH_RSP),%r12 /* %r12 already saved */
- movq %r12,TF_RSP(%rsp) /* user stack pointer */
- sti
call syscall
movq PCPU(CURPCB),%rax
testq $PCB_FULLCTX,PCB_FLAGS(%rax)
jne 3f
- /* simplified from doreti */
1: /* Check for and handle AST's on return to userland */
cli
movq PCPU(CURTHREAD),%rax
@@ -255,7 +282,7 @@ IDTVEC(fast_syscall)
movq TF_RIP(%rsp),%rcx /* original %rip */
movq TF_RSP(%rsp),%r9 /* user stack pointer */
movq %r9,%rsp /* original %rsp */
- /* XXX swapgs */
+ swapgs
sysretq
3: /* Requested full context restore, use doreti for that */
andq $~PCB_FULLCTX,PCB_FLAGS(%rax)
@@ -344,12 +371,16 @@ doreti_exit:
movq TF_R13(%rsp),%r13
movq TF_R14(%rsp),%r14
movq TF_R15(%rsp),%r15
- addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* keep running with kernel GS.base */
+ cli
+ swapgs
+1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
.globl doreti_iret
doreti_iret:
iretq
- /*
+ /*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
* above. trap() (i386/i386/trap.c) catches this specific
@@ -360,7 +391,13 @@ doreti_iret:
.globl doreti_iret_fault
doreti_iret_fault:
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
- movq %rdi,TF_RDI(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* already running with kernel GS.base */
+ swapgs
+1: testl $PSL_I,TF_RFLAGS(%rsp)
+ jz 2f
+ sti
+2: movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index cab9eab..f828e4d 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -70,6 +70,7 @@
#include <machine/cpu.h>
#include <machine/sigframe.h>
#include <machine/proc.h>
+#include <machine/specialreg.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -122,6 +123,8 @@ ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp));
ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx));
ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip));
ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags));
+ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
+ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_FULLCTX, PCB_FULLCTX);
@@ -178,6 +181,8 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
+ASSYM(MSR_FSBASE, MSR_FSBASE);
+ASSYM(MSR_KGSBASE, MSR_KGSBASE);
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7b2d462..d3890de0 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -465,6 +465,12 @@ exec_setregs(td, entry, stack, ps_strings)
u_long ps_strings;
{
struct trapframe *regs = td->td_frame;
+ struct pcb *pcb = td->td_pcb;
+
+ pcb->pcb_fsbase = 0;
+ pcb->pcb_gsbase = 0;
+ wrmsr(MSR_FSBASE, 0);
+ wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
@@ -654,7 +660,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(int0x80_syscall),
+ IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(int0x80_syscall),
IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
void
@@ -1182,9 +1188,9 @@ hammer_time(void)
lgdt(&r_gdt);
pc = &__pcpu;
- wrmsr(MSR_FSBASE, (u_int64_t)pc);
+ wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
- wrmsr(MSR_KGSBASE, (u_int64_t)pc);
+ wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcpu_init(pc, 0, sizeof(struct pcpu));
PCPU_SET(prvspace, pc);
@@ -1204,28 +1210,28 @@ hammer_time(void)
/* exceptions */
for (x = 0; x < NIDT; x++)
- setidt(x, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(0, &IDTVEC(div), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(0, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
setidt(1, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
- setidt(2, &IDTVEC(nmi), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(2, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 0);
setidt(3, &IDTVEC(bpt), SDT_SYSIGT, SEL_KPL, 0);
- setidt(4, &IDTVEC(ofl), SDT_SYSTGT, SEL_KPL, 0);
- setidt(5, &IDTVEC(bnd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(6, &IDTVEC(ill), SDT_SYSTGT, SEL_KPL, 0);
- setidt(7, &IDTVEC(dna), SDT_SYSTGT, SEL_KPL, 0);
- setidt(8, (inthand_t *)dblfault_handler, SDT_SYSIGT, SEL_KPL, 1);
- setidt(9, &IDTVEC(fpusegm), SDT_SYSTGT, SEL_KPL, 0);
- setidt(10, &IDTVEC(tss), SDT_SYSTGT, SEL_KPL, 0);
- setidt(11, &IDTVEC(missing), SDT_SYSTGT, SEL_KPL, 0);
- setidt(12, &IDTVEC(stk), SDT_SYSTGT, SEL_KPL, 0);
- setidt(13, &IDTVEC(prot), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(4, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(5, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(6, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(7, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(8, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
+ setidt(9, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(10, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(11, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(12, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(13, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0);
setidt(14, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0);
- setidt(15, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(16, &IDTVEC(fpu), SDT_SYSTGT, SEL_KPL, 0);
- setidt(17, &IDTVEC(align), SDT_SYSTGT, SEL_KPL, 0);
- setidt(18, &IDTVEC(mchk), SDT_SYSTGT, SEL_KPL, 0);
- setidt(19, &IDTVEC(xmm), SDT_SYSTGT, SEL_KPL, 0);
- setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSTGT, SEL_UPL, 0);
+ setidt(15, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(16, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(17, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(18, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(19, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0);
r_idt.rd_limit = sizeof(idt0) - 1;
r_idt.rd_base = (long) idt;
@@ -1251,8 +1257,6 @@ hammer_time(void)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
- /* XXX we need to update tss_rsp0 in cpu_switch */
- /* XXX maybe not yet, everything is still running in supervisor mode */
/* doublefault stack space, runs on ist1 */
common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index c815a59..a9ed7a1 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -40,6 +40,9 @@
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <machine/pcb.h>
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
@@ -53,9 +56,30 @@ sysarch(td, uap)
struct thread *td;
register struct sysarch_args *uap;
{
- int error;
+ int error = 0;
+ struct pcb *pcb = curthread->td_pcb;
switch(uap->op) {
+ case AMD64_GET_FSBASE:
+ error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
+ break;
+
+ case AMD64_SET_FSBASE:
+ error = copyin(uap->parms, &pcb->pcb_fsbase, sizeof(pcb->pcb_fsbase));
+ if (!error)
+ wrmsr(MSR_FSBASE, pcb->pcb_fsbase);
+ break;
+
+ case AMD64_GET_GSBASE:
+ error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
+ break;
+
+ case AMD64_SET_GSBASE:
+ error = copyin(uap->parms, &pcb->pcb_gsbase, sizeof(pcb->pcb_gsbase));
+ if (!error)
+ wrmsr(MSR_KGSBASE, pcb->pcb_fsbase);
+ break;
+
default:
error = EINVAL;
break;
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 551bf8f..c17709b 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -57,6 +57,8 @@ struct pcb {
register_t pcb_rbx;
register_t pcb_rip;
register_t pcb_rflags;
+ register_t pcb_fsbase;
+ register_t pcb_gsbase;
struct savefpu pcb_save;
u_long pcb_flags;
diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h
index c33f7b8..f14ee31 100644
--- a/sys/amd64/include/sysarch.h
+++ b/sys/amd64/include/sysarch.h
@@ -34,9 +34,27 @@
*/
/*
- * Architecture specific syscalls (i386)
+ * Architecture specific syscalls (AMD64)
*/
#ifndef _MACHINE_SYSARCH_H_
#define _MACHINE_SYSARCH_H_
+#define AMD64_GET_FSBASE 0
+#define AMD64_SET_FSBASE 1
+#define AMD64_GET_GSBASE 2
+#define AMD64_SET_GSBASE 3
+
+#if 0 /* these wrappers need to be implemented in libc first */
+#ifndef _KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+unsigned long amd64_get_fsbase(void);
+unsigned long amd64_set_fsbase(unsigned long);
+unsigned long amd64_get_gsbase(void);
+unsigned long amd64_set_gsbase(unsigned long);
+__END_DECLS
+#endif
+#endif
+
#endif /* !_MACHINE_SYSARCH_H_ */
diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S
index c9fea1a..d778f3f 100644
--- a/sys/amd64/isa/icu_vector.S
+++ b/sys/amd64/isa/icu_vector.S
@@ -25,7 +25,10 @@
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
- movq %rdi,TF_RDI(%rsp) ; \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* Yes, dont swapgs again */ \
+ swapgs ; \
+1: movq %rdi,TF_RDI(%rsp) ; \
movq %rsi,TF_RSI(%rsp) ; \
movq %rdx,TF_RDX(%rsp) ; \
movq %rcx,TF_RCX(%rsp) ; \
@@ -69,7 +72,10 @@ IDTVEC(vec_name) ; \
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
- movq %rdi,TF_RDI(%rsp) ; \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* Yes, dont swapgs again */ \
+ swapgs ; \
+1: movq %rdi,TF_RDI(%rsp) ; \
movq %rsi,TF_RSI(%rsp) ; \
movq %rdx,TF_RDX(%rsp) ; \
movq %rcx,TF_RCX(%rsp) ; \
OpenPOWER on IntegriCloud