summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/cpu_switch.S76
-rw-r--r--sys/amd64/amd64/exception.S111
-rw-r--r--sys/amd64/amd64/genassym.c5
-rw-r--r--sys/amd64/amd64/machdep.c52
-rw-r--r--sys/amd64/amd64/sys_machdep.c26
-rw-r--r--sys/amd64/include/pcb.h2
-rw-r--r--sys/amd64/include/sysarch.h20
-rw-r--r--sys/amd64/isa/icu_vector.S10
8 files changed, 211 insertions, 91 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 8cca838..d5976d4 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -92,27 +92,39 @@ ENTRY(cpu_switch)
jz badsw2 /* no, panic */
#endif
- movq TD_PCB(%rdi),%rdx
+ movq TD_PCB(%rdi),%r8
movq (%rsp),%rax /* Hardware registers */
- movq %rax,PCB_RIP(%rdx)
- movq %rbx,PCB_RBX(%rdx)
- movq %rsp,PCB_RSP(%rdx)
- movq %rbp,PCB_RBP(%rdx)
- movq %r12,PCB_R12(%rdx)
- movq %r13,PCB_R13(%rdx)
- movq %r14,PCB_R14(%rdx)
- movq %r15,PCB_R15(%rdx)
+ movq %rax,PCB_RIP(%r8)
+ movq %rbx,PCB_RBX(%r8)
+ movq %rsp,PCB_RSP(%r8)
+ movq %rbp,PCB_RBP(%r8)
+ movq %r12,PCB_R12(%r8)
+ movq %r13,PCB_R13(%r8)
+ movq %r14,PCB_R14(%r8)
+ movq %r15,PCB_R15(%r8)
pushfq /* PSL */
- popq PCB_RFLAGS(%rdx)
+ popq PCB_RFLAGS(%r8)
+
+ /* Save userland %fs */
+ movl $MSR_FSBASE,%ecx
+ rdmsr
+ movl %eax,PCB_FSBASE(%r8)
+ movl %edx,PCB_FSBASE+4(%r8)
+
+ /* Save userland %gs */
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ movl %eax,PCB_GSBASE(%r8)
+ movl %edx,PCB_GSBASE+4(%r8)
/* have we used fp, and need a save? */
cmpq %rdi,PCPU(FPCURTHREAD)
jne 1f
pushq %rdi
pushq %rsi
- addq $PCB_SAVEFPU,%rdx /* h/w bugs make saving complicated */
- movq %rdx, %rdi
+ addq $PCB_SAVEFPU,%r8 /* h/w bugs make saving complicated */
+ movq %r8, %rdi
call npxsave /* do it in a big C function */
popq %rsi
popq %rdi
@@ -123,12 +135,12 @@ ENTRY(cpu_switch)
testq %rsi,%rsi /* no thread? */
jz badsw3 /* no, panic */
#endif
- movq TD_PCB(%rsi),%rdx
+ movq TD_PCB(%rsi),%r8
xorq %rax, %rax
movl PCPU(CPUID), %eax
/* switch address space */
- movq PCB_CR3(%rdx),%rdx
+ movq PCB_CR3(%r8),%rdx
movq %rdx,%cr3 /* new address space */
/* Release bit from old pmap->pm_active */
@@ -146,26 +158,38 @@ sw1:
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
*/
- movq TD_PCB(%rsi),%rdx
+ movq TD_PCB(%rsi),%r8
+
+ /* Restore userland %fs */
+ movl $MSR_FSBASE,%ecx
+ movl PCB_FSBASE(%r8),%eax
+ movl PCB_FSBASE+4(%r8),%edx
+ wrmsr
+
+ /* Restore userland %gs */
+ movl $MSR_KGSBASE,%ecx
+ movl PCB_GSBASE(%r8),%eax
+ movl PCB_GSBASE+4(%r8),%edx
+ wrmsr
/* Update the TSS_RSP0 pointer for the next interrupt */
- leaq -16(%rdx), %rbx
+ leaq -16(%r8), %rbx
movq %rbx, common_tss + COMMON_TSS_RSP0
/* Restore context. */
- movq PCB_RBX(%rdx),%rbx
- movq PCB_RSP(%rdx),%rsp
- movq PCB_RBP(%rdx),%rbp
- movq PCB_R12(%rdx),%r12
- movq PCB_R13(%rdx),%r13
- movq PCB_R14(%rdx),%r14
- movq PCB_R15(%rdx),%r15
- movq PCB_RIP(%rdx),%rax
+ movq PCB_RBX(%r8),%rbx
+ movq PCB_RSP(%r8),%rsp
+ movq PCB_RBP(%r8),%rbp
+ movq PCB_R12(%r8),%r12
+ movq PCB_R13(%r8),%r13
+ movq PCB_R14(%r8),%r14
+ movq PCB_R15(%r8),%r15
+ movq PCB_RIP(%r8),%rax
movq %rax,(%rsp)
- pushq PCB_RFLAGS(%rdx)
+ pushq PCB_RFLAGS(%r8)
popfq
- movq %rdx, PCPU(CURPCB)
+ movq %r8, PCPU(CURPCB)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
ret
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 9b20068..e36bcb1 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -51,16 +51,16 @@
/*
* Trap and fault vector routines.
*
- * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable
- * interrupts. A few of the traps we are use are interrupt gates,
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
+ * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes
+ * state on the stack but also disables interrupts. This is important for
+ * us for the use of the swapgs instruction. We cannot be interrupted
+ * until the GS.base value is correct. For most traps, we automatically
+ * then enable interrupts if the interrupted context had them enabled.
+ * This is equivalent to the i386 port's use of SDT_SYS386TGT.
*
* The cpu will push a certain amount of state onto the kernel stack for
- * the current process. The amount of state depends on the type of trap
- * and whether the trap crossed rings or not. See i386/include/frame.h.
- * At the very least the current EFLAGS (status register, which includes
+ * the current process. See amd64/include/frame.h.
+ * This includes the current RFLAGS (status register, which includes
* the interrupt disable state prior to the trap), the code segment register,
* and the return instruction pointer are pushed by the cpu. The cpu
* will also push an 'error' code for certain traps. We push a dummy
@@ -75,6 +75,7 @@
#define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \
.type __CONCAT(X,name),@function; __CONCAT(X,name):
#define TRAP(a) pushq $(a) ; jmp alltraps
+#define TRAP_NOEN(a) pushq $(a) ; jmp alltraps_noen
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
@@ -82,11 +83,11 @@ MCOUNT_LABEL(btrap)
IDTVEC(div)
pushq $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
- pushq $0; TRAP(T_TRCTRAP)
+ pushq $0; TRAP_NOEN(T_TRCTRAP)
IDTVEC(nmi)
pushq $0; TRAP(T_NMI)
IDTVEC(bpt)
- pushq $0; TRAP(T_BPTFLT)
+ pushq $0; TRAP_NOEN(T_BPTFLT)
IDTVEC(ofl)
pushq $0; TRAP(T_OFLOW)
IDTVEC(bnd)
@@ -106,7 +107,7 @@ IDTVEC(stk)
IDTVEC(prot)
TRAP(T_PROTFLT)
IDTVEC(page)
- TRAP(T_PAGEFLT)
+ TRAP_NOEN(T_PAGEFLT)
IDTVEC(mchk)
pushq $0; TRAP(T_MCHK)
IDTVEC(rsvd)
@@ -119,10 +120,9 @@ IDTVEC(xmm)
pushq $0; TRAP(T_XMMFLT)
/*
- * alltraps entry point. Interrupts are enabled if this was a trap
- * gate (TGT), else disabled if this was an interrupt gate (IGT).
- * Note that int0x80_syscall is a trap gate. Only page faults
- * use an interrupt gate.
+ * alltraps entry point. Use swapgs if this is the first time in the
+ * kernel from userland. Reenable interrupts if they were enabled
+ * before the trap. This approximates SDT_SYS386TGT on the i386 port.
*/
SUPERALIGN_TEXT
@@ -130,6 +130,14 @@ IDTVEC(xmm)
.type alltraps,@function
alltraps:
subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz alltraps_testi /* already running with kernel GS.base */
+ swapgs
+alltraps_testi:
+ testl $PSL_I,TF_RFLAGS(%rsp)
+ jz alltraps_pushregs
+ sti
+alltraps_pushregs:
movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
@@ -153,22 +161,43 @@ calltrap:
MEXITCOUNT
jmp doreti /* Handle any pending ASTs */
+ /*
+ * alltraps_noen entry point. Unlike alltraps above, we want to
+ * leave the interrupts disabled. This corresponds to
+ * SDT_SYS386IGT on the i386 port.
+ */
+ SUPERALIGN_TEXT
+ .globl alltraps_noen
+ .type alltraps_noen,@function
+alltraps_noen:
+ subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz alltraps_pushregs /* already running with kernel GS.base */
+ swapgs
+ jmp alltraps_pushregs
+
+IDTVEC(dblfault)
+ pushq $T_DOUBLEFLT
+ subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* already running with kernel GS.base */
+ swapgs
+1: call dblfault_handler
+2: hlt
+ jmp 2b
+
/*
* Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
*
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate). Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
+ * This is a SDT_SYSIDT entry point (unlike the i386 port) so that we
+ * can do a swapgs before enabling interrupts. This is critical because
+ * if we took an interrupt before swapgs, the interrupt code would see
+ * that it originated in supervisor mode and skip the swapgs.
*/
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
+ swapgs
+ sti
pushq $2 /* sizeof "int 0x80" */
subq $TF_ERR,%rsp /* skip over tf_trapno */
movq %rdi,TF_RDI(%rsp)
@@ -196,19 +225,21 @@ IDTVEC(int0x80_syscall)
* and the new privilige level. We are still running on the old user stack
* pointer. We have to juggle a few things around to find our stack etc.
* swapgs gives us access to our PCPU space only.
- * XXX The PCPU stuff is stubbed out right now...
*/
IDTVEC(fast_syscall)
- /* XXX swapgs */
+ swapgs
movq %rsp,PCPU(SCRATCH_RSP)
movq common_tss+COMMON_TSS_RSP0,%rsp
/* Now emulate a trapframe. Ugh. */
subq $TF_SIZE,%rsp
- movq $KUDSEL,TF_SS(%rsp)
/* defer TF_RSP till we have a spare register */
movq %r11,TF_RFLAGS(%rsp)
- movq $KUCSEL,TF_CS(%rsp)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
+ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
+ movq %r11,TF_RSP(%rsp) /* user stack pointer */
+ sti
+ movq $KUDSEL,TF_SS(%rsp)
+ movq $KUCSEL,TF_CS(%rsp)
movq $2,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp) /* arg 1 */
movq %rsi,TF_RSI(%rsp) /* arg 2 */
@@ -223,14 +254,10 @@ IDTVEC(fast_syscall)
movq %r13,TF_R13(%rsp) /* C preserved */
movq %r14,TF_R14(%rsp) /* C preserved */
movq %r15,TF_R15(%rsp) /* C preserved */
- movq PCPU(SCRATCH_RSP),%r12 /* %r12 already saved */
- movq %r12,TF_RSP(%rsp) /* user stack pointer */
- sti
call syscall
movq PCPU(CURPCB),%rax
testq $PCB_FULLCTX,PCB_FLAGS(%rax)
jne 3f
- /* simplified from doreti */
1: /* Check for and handle AST's on return to userland */
cli
movq PCPU(CURTHREAD),%rax
@@ -255,7 +282,7 @@ IDTVEC(fast_syscall)
movq TF_RIP(%rsp),%rcx /* original %rip */
movq TF_RSP(%rsp),%r9 /* user stack pointer */
movq %r9,%rsp /* original %rsp */
- /* XXX swapgs */
+ swapgs
sysretq
3: /* Requested full context restore, use doreti for that */
andq $~PCB_FULLCTX,PCB_FLAGS(%rax)
@@ -344,12 +371,16 @@ doreti_exit:
movq TF_R13(%rsp),%r13
movq TF_R14(%rsp),%r14
movq TF_R15(%rsp),%r15
- addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* keep running with kernel GS.base */
+ cli
+ swapgs
+1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
.globl doreti_iret
doreti_iret:
iretq
- /*
+ /*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
* above. trap() (i386/i386/trap.c) catches this specific
@@ -360,7 +391,13 @@ doreti_iret:
.globl doreti_iret_fault
doreti_iret_fault:
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
- movq %rdi,TF_RDI(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
+ jz 1f /* already running with kernel GS.base */
+ swapgs
+1: testl $PSL_I,TF_RFLAGS(%rsp)
+ jz 2f
+ sti
+2: movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index cab9eab..f828e4d 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -70,6 +70,7 @@
#include <machine/cpu.h>
#include <machine/sigframe.h>
#include <machine/proc.h>
+#include <machine/specialreg.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -122,6 +123,8 @@ ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp));
ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx));
ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip));
ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags));
+ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
+ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_FULLCTX, PCB_FULLCTX);
@@ -178,6 +181,8 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
+ASSYM(MSR_FSBASE, MSR_FSBASE);
+ASSYM(MSR_KGSBASE, MSR_KGSBASE);
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7b2d462..d3890de0 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -465,6 +465,12 @@ exec_setregs(td, entry, stack, ps_strings)
u_long ps_strings;
{
struct trapframe *regs = td->td_frame;
+ struct pcb *pcb = td->td_pcb;
+
+ pcb->pcb_fsbase = 0;
+ pcb->pcb_gsbase = 0;
+ wrmsr(MSR_FSBASE, 0);
+ wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
@@ -654,7 +660,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(xmm), IDTVEC(int0x80_syscall),
+ IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(int0x80_syscall),
IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
void
@@ -1182,9 +1188,9 @@ hammer_time(void)
lgdt(&r_gdt);
pc = &__pcpu;
- wrmsr(MSR_FSBASE, (u_int64_t)pc);
+ wrmsr(MSR_FSBASE, 0); /* User value */
wrmsr(MSR_GSBASE, (u_int64_t)pc);
- wrmsr(MSR_KGSBASE, (u_int64_t)pc);
+ wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcpu_init(pc, 0, sizeof(struct pcpu));
PCPU_SET(prvspace, pc);
@@ -1204,28 +1210,28 @@ hammer_time(void)
/* exceptions */
for (x = 0; x < NIDT; x++)
- setidt(x, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(0, &IDTVEC(div), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(0, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0);
setidt(1, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0);
- setidt(2, &IDTVEC(nmi), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(2, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 0);
setidt(3, &IDTVEC(bpt), SDT_SYSIGT, SEL_KPL, 0);
- setidt(4, &IDTVEC(ofl), SDT_SYSTGT, SEL_KPL, 0);
- setidt(5, &IDTVEC(bnd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(6, &IDTVEC(ill), SDT_SYSTGT, SEL_KPL, 0);
- setidt(7, &IDTVEC(dna), SDT_SYSTGT, SEL_KPL, 0);
- setidt(8, (inthand_t *)dblfault_handler, SDT_SYSIGT, SEL_KPL, 1);
- setidt(9, &IDTVEC(fpusegm), SDT_SYSTGT, SEL_KPL, 0);
- setidt(10, &IDTVEC(tss), SDT_SYSTGT, SEL_KPL, 0);
- setidt(11, &IDTVEC(missing), SDT_SYSTGT, SEL_KPL, 0);
- setidt(12, &IDTVEC(stk), SDT_SYSTGT, SEL_KPL, 0);
- setidt(13, &IDTVEC(prot), SDT_SYSTGT, SEL_KPL, 0);
+ setidt(4, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(5, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(6, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(7, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(8, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
+ setidt(9, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(10, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(11, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(12, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(13, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0);
setidt(14, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0);
- setidt(15, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0);
- setidt(16, &IDTVEC(fpu), SDT_SYSTGT, SEL_KPL, 0);
- setidt(17, &IDTVEC(align), SDT_SYSTGT, SEL_KPL, 0);
- setidt(18, &IDTVEC(mchk), SDT_SYSTGT, SEL_KPL, 0);
- setidt(19, &IDTVEC(xmm), SDT_SYSTGT, SEL_KPL, 0);
- setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSTGT, SEL_UPL, 0);
+ setidt(15, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(16, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(17, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(18, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(19, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
+ setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0);
r_idt.rd_limit = sizeof(idt0) - 1;
r_idt.rd_base = (long) idt;
@@ -1251,8 +1257,6 @@ hammer_time(void)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb);
- /* XXX we need to update tss_rsp0 in cpu_switch */
- /* XXX maybe not yet, everything is still running in supervisor mode */
/* doublefault stack space, runs on ist1 */
common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index c815a59..a9ed7a1 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -40,6 +40,9 @@
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <machine/pcb.h>
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
@@ -53,9 +56,30 @@ sysarch(td, uap)
struct thread *td;
register struct sysarch_args *uap;
{
- int error;
+ int error = 0;
+ struct pcb *pcb = curthread->td_pcb;
switch(uap->op) {
+ case AMD64_GET_FSBASE:
+ error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
+ break;
+
+ case AMD64_SET_FSBASE:
+ error = copyin(uap->parms, &pcb->pcb_fsbase, sizeof(pcb->pcb_fsbase));
+ if (!error)
+ wrmsr(MSR_FSBASE, pcb->pcb_fsbase);
+ break;
+
+ case AMD64_GET_GSBASE:
+ error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
+ break;
+
+ case AMD64_SET_GSBASE:
+ error = copyin(uap->parms, &pcb->pcb_gsbase, sizeof(pcb->pcb_gsbase));
+ if (!error)
+ wrmsr(MSR_KGSBASE, pcb->pcb_fsbase);
+ break;
+
default:
error = EINVAL;
break;
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 551bf8f..c17709b 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -57,6 +57,8 @@ struct pcb {
register_t pcb_rbx;
register_t pcb_rip;
register_t pcb_rflags;
+ register_t pcb_fsbase;
+ register_t pcb_gsbase;
struct savefpu pcb_save;
u_long pcb_flags;
diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h
index c33f7b8..f14ee31 100644
--- a/sys/amd64/include/sysarch.h
+++ b/sys/amd64/include/sysarch.h
@@ -34,9 +34,27 @@
*/
/*
- * Architecture specific syscalls (i386)
+ * Architecture specific syscalls (AMD64)
*/
#ifndef _MACHINE_SYSARCH_H_
#define _MACHINE_SYSARCH_H_
+#define AMD64_GET_FSBASE 0
+#define AMD64_SET_FSBASE 1
+#define AMD64_GET_GSBASE 2
+#define AMD64_SET_GSBASE 3
+
+#if 0 /* these wrappers need to be implemented in libc first */
+#ifndef _KERNEL
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+unsigned long amd64_get_fsbase(void);
+unsigned long amd64_set_fsbase(unsigned long);
+unsigned long amd64_get_gsbase(void);
+unsigned long amd64_set_gsbase(unsigned long);
+__END_DECLS
+#endif
+#endif
+
#endif /* !_MACHINE_SYSARCH_H_ */
diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S
index c9fea1a..d778f3f 100644
--- a/sys/amd64/isa/icu_vector.S
+++ b/sys/amd64/isa/icu_vector.S
@@ -25,7 +25,10 @@
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
- movq %rdi,TF_RDI(%rsp) ; \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* Yes, dont swapgs again */ \
+ swapgs ; \
+1: movq %rdi,TF_RDI(%rsp) ; \
movq %rsi,TF_RSI(%rsp) ; \
movq %rdx,TF_RDX(%rsp) ; \
movq %rcx,TF_RCX(%rsp) ; \
@@ -69,7 +72,10 @@ IDTVEC(vec_name) ; \
SUPERALIGN_TEXT ; \
IDTVEC(vec_name) ; \
subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \
- movq %rdi,TF_RDI(%rsp) ; \
+ testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \
+ jz 1f ; /* Yes, dont swapgs again */ \
+ swapgs ; \
+1: movq %rdi,TF_RDI(%rsp) ; \
movq %rsi,TF_RSI(%rsp) ; \
movq %rdx,TF_RDX(%rsp) ; \
movq %rcx,TF_RCX(%rsp) ; \
OpenPOWER on IntegriCloud