diff options
-rw-r--r-- | sys/amd64/amd64/cpu_switch.S | 76 | ||||
-rw-r--r-- | sys/amd64/amd64/exception.S | 111 | ||||
-rw-r--r-- | sys/amd64/amd64/genassym.c | 5 | ||||
-rw-r--r-- | sys/amd64/amd64/machdep.c | 52 | ||||
-rw-r--r-- | sys/amd64/amd64/sys_machdep.c | 26 | ||||
-rw-r--r-- | sys/amd64/include/pcb.h | 2 | ||||
-rw-r--r-- | sys/amd64/include/sysarch.h | 20 | ||||
-rw-r--r-- | sys/amd64/isa/icu_vector.S | 10 |
8 files changed, 211 insertions, 91 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 8cca838..d5976d4 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -92,27 +92,39 @@ ENTRY(cpu_switch) jz badsw2 /* no, panic */ #endif - movq TD_PCB(%rdi),%rdx + movq TD_PCB(%rdi),%r8 movq (%rsp),%rax /* Hardware registers */ - movq %rax,PCB_RIP(%rdx) - movq %rbx,PCB_RBX(%rdx) - movq %rsp,PCB_RSP(%rdx) - movq %rbp,PCB_RBP(%rdx) - movq %r12,PCB_R12(%rdx) - movq %r13,PCB_R13(%rdx) - movq %r14,PCB_R14(%rdx) - movq %r15,PCB_R15(%rdx) + movq %rax,PCB_RIP(%r8) + movq %rbx,PCB_RBX(%r8) + movq %rsp,PCB_RSP(%r8) + movq %rbp,PCB_RBP(%r8) + movq %r12,PCB_R12(%r8) + movq %r13,PCB_R13(%r8) + movq %r14,PCB_R14(%r8) + movq %r15,PCB_R15(%r8) pushfq /* PSL */ - popq PCB_RFLAGS(%rdx) + popq PCB_RFLAGS(%r8) + + /* Save userland %fs */ + movl $MSR_FSBASE,%ecx + rdmsr + movl %eax,PCB_FSBASE(%r8) + movl %edx,PCB_FSBASE+4(%r8) + + /* Save userland %gs */ + movl $MSR_KGSBASE,%ecx + rdmsr + movl %eax,PCB_GSBASE(%r8) + movl %edx,PCB_GSBASE+4(%r8) /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f pushq %rdi pushq %rsi - addq $PCB_SAVEFPU,%rdx /* h/w bugs make saving complicated */ - movq %rdx, %rdi + addq $PCB_SAVEFPU,%r8 /* h/w bugs make saving complicated */ + movq %r8, %rdi call npxsave /* do it in a big C function */ popq %rsi popq %rdi @@ -123,12 +135,12 @@ ENTRY(cpu_switch) testq %rsi,%rsi /* no thread? */ jz badsw3 /* no, panic */ #endif - movq TD_PCB(%rsi),%rdx + movq TD_PCB(%rsi),%r8 xorq %rax, %rax movl PCPU(CPUID), %eax /* switch address space */ - movq PCB_CR3(%rdx),%rdx + movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ /* Release bit from old pmap->pm_active */ @@ -146,26 +158,38 @@ sw1: * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ - movq TD_PCB(%rsi),%rdx + movq TD_PCB(%rsi),%r8 + + /* Restore userland %fs */ + movl $MSR_FSBASE,%ecx + movl PCB_FSBASE(%r8),%eax + movl PCB_FSBASE+4(%r8),%edx + wrmsr + + /* Restore userland %gs */ + movl $MSR_KGSBASE,%ecx + movl PCB_GSBASE(%r8),%eax + movl PCB_GSBASE+4(%r8),%edx + wrmsr /* Update the TSS_RSP0 pointer for the next interrupt */ - leaq -16(%rdx), %rbx + leaq -16(%r8), %rbx movq %rbx, common_tss + COMMON_TSS_RSP0 /* Restore context. */ - movq PCB_RBX(%rdx),%rbx - movq PCB_RSP(%rdx),%rsp - movq PCB_RBP(%rdx),%rbp - movq PCB_R12(%rdx),%r12 - movq PCB_R13(%rdx),%r13 - movq PCB_R14(%rdx),%r14 - movq PCB_R15(%rdx),%r15 - movq PCB_RIP(%rdx),%rax + movq PCB_RBX(%r8),%rbx + movq PCB_RSP(%r8),%rsp + movq PCB_RBP(%r8),%rbp + movq PCB_R12(%r8),%r12 + movq PCB_R13(%r8),%r13 + movq PCB_R14(%r8),%r14 + movq PCB_R15(%r8),%r15 + movq PCB_RIP(%r8),%rax movq %rax,(%rsp) - pushq PCB_RFLAGS(%rdx) + pushq PCB_RFLAGS(%r8) popfq - movq %rdx, PCPU(CURPCB) + movq %r8, PCPU(CURPCB) movq %rsi, PCPU(CURTHREAD) /* into next thread */ ret diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 9b20068..e36bcb1 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -51,16 +51,16 @@ /* * Trap and fault vector routines. * - * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on - * the stack that mostly looks like an interrupt, but does not disable - * interrupts. A few of the traps we are use are interrupt gates, - * SDT_SYS386IGT, which are nearly the same thing except interrupts are - * disabled on entry. + * All traps are 'interrupt gates', SDT_SYSIGT. An interrupt gate pushes + * state on the stack but also disables interrupts. This is important for + * us for the use of the swapgs instruction. We cannot be interrupted + * until the GS.base value is correct. For most traps, we automatically + * then enable interrupts if the interrupted context had them enabled. + * This is equivalent to the i386 port's use of SDT_SYS386TGT. * * The cpu will push a certain amount of state onto the kernel stack for - * the current process. The amount of state depends on the type of trap - * and whether the trap crossed rings or not. See i386/include/frame.h. - * At the very least the current EFLAGS (status register, which includes + * the current process. See amd64/include/frame.h. + * This includes the current RFLAGS (status register, which includes * the interrupt disable state prior to the trap), the code segment register, * and the return instruction pointer are pushed by the cpu. The cpu * will also push an 'error' code for certain traps. We push a dummy @@ -75,6 +75,7 @@ #define IDTVEC(name) ALIGN_TEXT; .globl __CONCAT(X,name); \ .type __CONCAT(X,name),@function; __CONCAT(X,name): #define TRAP(a) pushq $(a) ; jmp alltraps +#define TRAP_NOEN(a) pushq $(a) ; jmp alltraps_noen MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) @@ -82,11 +83,11 @@ MCOUNT_LABEL(btrap) IDTVEC(div) pushq $0; TRAP(T_DIVIDE) IDTVEC(dbg) - pushq $0; TRAP(T_TRCTRAP) + pushq $0; TRAP_NOEN(T_TRCTRAP) IDTVEC(nmi) pushq $0; TRAP(T_NMI) IDTVEC(bpt) - pushq $0; TRAP(T_BPTFLT) + pushq $0; TRAP_NOEN(T_BPTFLT) IDTVEC(ofl) pushq $0; TRAP(T_OFLOW) IDTVEC(bnd) @@ -106,7 +107,7 @@ IDTVEC(stk) IDTVEC(prot) TRAP(T_PROTFLT) IDTVEC(page) - TRAP(T_PAGEFLT) + TRAP_NOEN(T_PAGEFLT) IDTVEC(mchk) pushq $0; TRAP(T_MCHK) IDTVEC(rsvd) @@ -119,10 +120,9 @@ IDTVEC(xmm) pushq $0; TRAP(T_XMMFLT) /* - * alltraps entry point. Interrupts are enabled if this was a trap - * gate (TGT), else disabled if this was an interrupt gate (IGT). - * Note that int0x80_syscall is a trap gate. Only page faults - * use an interrupt gate. + * alltraps entry point. Use swapgs if this is the first time in the + * kernel from userland. Reenable interrupts if they were enabled + * before the trap. This approximates SDT_SYS386TGT on the i386 port. */ SUPERALIGN_TEXT @@ -130,6 +130,14 @@ IDTVEC(xmm) .type alltraps,@function alltraps: subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz alltraps_testi /* already running with kernel GS.base */ + swapgs +alltraps_testi: + testl $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs + sti +alltraps_pushregs: movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) @@ -153,22 +161,43 @@ calltrap: MEXITCOUNT jmp doreti /* Handle any pending ASTs */ + /* + * alltraps_noen entry point. Unlike alltraps above, we want to + * leave the interrupts disabled. This corresponds to + * SDT_SYS386IGT on the i386 port. + */ + SUPERALIGN_TEXT + .globl alltraps_noen + .type alltraps_noen,@function +alltraps_noen: + subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz alltraps_pushregs /* already running with kernel GS.base */ + swapgs + jmp alltraps_pushregs + +IDTVEC(dblfault) + pushq $T_DOUBLEFLT + subq $TF_TRAPNO,%rsp /* tf_err and tf_trapno already pushed */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* already running with kernel GS.base */ + swapgs +1: call dblfault_handler +2: hlt + jmp 2b + /* * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) * - * Even though the name says 'int0x80', this is actually a TGT (trap gate) - * rather then an IGT (interrupt gate). Thus interrupts are enabled on - * entry just as they are for a normal syscall. - * - * This leaves a place to put eflags so that the call frame can be - * converted to a trap frame. Note that the eflags is (semi-)bogusly - * pushed into (what will be) tf_err and then copied later into the - * final spot. It has to be done this way because esp can't be just - * temporarily altered for the pushfl - an interrupt might come in - * and clobber the saved cs/eip. + * This is a SDT_SYSIDT entry point (unlike the i386 port) so that we + * can do a swapgs before enabling interrupts. This is critical because + * if we took an interrupt before swapgs, the interrupt code would see + * that it originated in supervisor mode and skip the swapgs. */ SUPERALIGN_TEXT IDTVEC(int0x80_syscall) + swapgs + sti pushq $2 /* sizeof "int 0x80" */ subq $TF_ERR,%rsp /* skip over tf_trapno */ movq %rdi,TF_RDI(%rsp) @@ -196,19 +225,21 @@ IDTVEC(int0x80_syscall) * and the new privilige level. We are still running on the old user stack * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. - * XXX The PCPU stuff is stubbed out right now... */ IDTVEC(fast_syscall) - /* XXX swapgs */ + swapgs movq %rsp,PCPU(SCRATCH_RSP) movq common_tss+COMMON_TSS_RSP0,%rsp /* Now emulate a trapframe. Ugh. */ subq $TF_SIZE,%rsp - movq $KUDSEL,TF_SS(%rsp) /* defer TF_RSP till we have a spare register */ movq %r11,TF_RFLAGS(%rsp) - movq $KUCSEL,TF_CS(%rsp) movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ + movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ + movq %r11,TF_RSP(%rsp) /* user stack pointer */ + sti + movq $KUDSEL,TF_SS(%rsp) + movq $KUCSEL,TF_CS(%rsp) movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ @@ -223,14 +254,10 @@ IDTVEC(fast_syscall) movq %r13,TF_R13(%rsp) /* C preserved */ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ - movq PCPU(SCRATCH_RSP),%r12 /* %r12 already saved */ - movq %r12,TF_RSP(%rsp) /* user stack pointer */ - sti call syscall movq PCPU(CURPCB),%rax testq $PCB_FULLCTX,PCB_FLAGS(%rax) jne 3f - /* simplified from doreti */ 1: /* Check for and handle AST's on return to userland */ cli movq PCPU(CURTHREAD),%rax @@ -255,7 +282,7 @@ IDTVEC(fast_syscall) movq TF_RIP(%rsp),%rcx /* original %rip */ movq TF_RSP(%rsp),%r9 /* user stack pointer */ movq %r9,%rsp /* original %rsp */ - /* XXX swapgs */ + swapgs sysretq 3: /* Requested full context restore, use doreti for that */ andq $~PCB_FULLCTX,PCB_FLAGS(%rax) @@ -344,12 +371,16 @@ doreti_exit: movq TF_R13(%rsp),%r13 movq TF_R14(%rsp),%r14 movq TF_R15(%rsp),%r15 - addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* keep running with kernel GS.base */ + cli + swapgs +1: addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ .globl doreti_iret doreti_iret: iretq - /* + /* * doreti_iret_fault and friends. Alternative return code for * the case where we get a fault in the doreti_exit code * above. trap() (i386/i386/trap.c) catches this specific @@ -360,7 +391,13 @@ doreti_iret: .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ - movq %rdi,TF_RDI(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ + jz 1f /* already running with kernel GS.base */ + swapgs +1: testl $PSL_I,TF_RFLAGS(%rsp) + jz 2f + sti +2: movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index cab9eab..f828e4d 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -70,6 +70,7 @@ #include <machine/cpu.h> #include <machine/sigframe.h> #include <machine/proc.h> +#include <machine/specialreg.h> ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -122,6 +123,8 @@ ASSYM(PCB_RSP, offsetof(struct pcb, pcb_rsp)); ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx)); ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip)); ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags)); +ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); +ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); ASSYM(PCB_FULLCTX, PCB_FULLCTX); @@ -178,6 +181,8 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); +ASSYM(MSR_FSBASE, MSR_FSBASE); +ASSYM(MSR_KGSBASE, MSR_KGSBASE); ASSYM(GPROC0_SEL, GPROC0_SEL); ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 7b2d462..d3890de0 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -465,6 +465,12 @@ exec_setregs(td, entry, stack, ps_strings) u_long ps_strings; { struct trapframe *regs = td->td_frame; + struct pcb *pcb = td->td_pcb; + + pcb->pcb_fsbase = 0; + pcb->pcb_gsbase = 0; + wrmsr(MSR_FSBASE, 0); + wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; @@ -654,7 +660,7 @@ extern inthand_t IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), - IDTVEC(xmm), IDTVEC(int0x80_syscall), + IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(int0x80_syscall), IDTVEC(fast_syscall), IDTVEC(fast_syscall32); void @@ -1182,9 +1188,9 @@ hammer_time(void) lgdt(&r_gdt); pc = &__pcpu; - wrmsr(MSR_FSBASE, (u_int64_t)pc); + wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); - wrmsr(MSR_KGSBASE, (u_int64_t)pc); + wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); @@ -1204,28 +1210,28 @@ hammer_time(void) /* exceptions */ for (x = 0; x < NIDT; x++) - setidt(x, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0); - setidt(0, &IDTVEC(div), SDT_SYSTGT, SEL_KPL, 0); + setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); + setidt(0, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(1, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); - setidt(2, &IDTVEC(nmi), SDT_SYSTGT, SEL_KPL, 0); + setidt(2, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 0); setidt(3, &IDTVEC(bpt), SDT_SYSIGT, SEL_KPL, 0); - setidt(4, &IDTVEC(ofl), SDT_SYSTGT, SEL_KPL, 0); - setidt(5, &IDTVEC(bnd), SDT_SYSTGT, SEL_KPL, 0); - setidt(6, &IDTVEC(ill), SDT_SYSTGT, SEL_KPL, 0); - setidt(7, &IDTVEC(dna), SDT_SYSTGT, SEL_KPL, 0); - setidt(8, (inthand_t *)dblfault_handler, SDT_SYSIGT, SEL_KPL, 1); - setidt(9, &IDTVEC(fpusegm), SDT_SYSTGT, SEL_KPL, 0); - setidt(10, &IDTVEC(tss), SDT_SYSTGT, SEL_KPL, 0); - setidt(11, &IDTVEC(missing), SDT_SYSTGT, SEL_KPL, 0); - setidt(12, &IDTVEC(stk), SDT_SYSTGT, SEL_KPL, 0); - setidt(13, &IDTVEC(prot), SDT_SYSTGT, SEL_KPL, 0); + setidt(4, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); + setidt(5, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); + setidt(6, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); + setidt(7, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); + setidt(8, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); + setidt(9, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); + setidt(10, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); + setidt(11, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); + setidt(12, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); + setidt(13, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(14, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); - setidt(15, &IDTVEC(rsvd), SDT_SYSTGT, SEL_KPL, 0); - setidt(16, &IDTVEC(fpu), SDT_SYSTGT, SEL_KPL, 0); - setidt(17, &IDTVEC(align), SDT_SYSTGT, SEL_KPL, 0); - setidt(18, &IDTVEC(mchk), SDT_SYSTGT, SEL_KPL, 0); - setidt(19, &IDTVEC(xmm), SDT_SYSTGT, SEL_KPL, 0); - setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSTGT, SEL_UPL, 0); + setidt(15, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); + setidt(16, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); + setidt(17, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); + setidt(18, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); + setidt(19, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); + setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYSIGT, SEL_UPL, 0); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; @@ -1251,8 +1257,6 @@ hammer_time(void) /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_rsp0 = thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); - /* XXX we need to update tss_rsp0 in cpu_switch */ - /* XXX maybe not yet, everything is still running in supervisor mode */ /* doublefault stack space, runs on ist1 */ common_tss.tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index c815a59..a9ed7a1 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -40,6 +40,9 @@ #include <sys/lock.h> #include <sys/proc.h> #include <sys/sysproto.h> +#include <machine/specialreg.h> +#include <machine/sysarch.h> +#include <machine/pcb.h> #ifndef _SYS_SYSPROTO_H_ struct sysarch_args { @@ -53,9 +56,30 @@ sysarch(td, uap) struct thread *td; register struct sysarch_args *uap; { - int error; + int error = 0; + struct pcb *pcb = curthread->td_pcb; switch(uap->op) { + case AMD64_GET_FSBASE: + error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); + break; + + case AMD64_SET_FSBASE: + error = copyin(uap->parms, &pcb->pcb_fsbase, sizeof(pcb->pcb_fsbase)); + if (!error) + wrmsr(MSR_FSBASE, pcb->pcb_fsbase); + break; + + case AMD64_GET_GSBASE: + error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); + break; + + case AMD64_SET_GSBASE: + error = copyin(uap->parms, &pcb->pcb_gsbase, sizeof(pcb->pcb_gsbase)); + if (!error) + wrmsr(MSR_KGSBASE, pcb->pcb_fsbase); + break; + default: error = EINVAL; break; diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 551bf8f..c17709b 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -57,6 +57,8 @@ struct pcb { register_t pcb_rbx; register_t pcb_rip; register_t pcb_rflags; + register_t pcb_fsbase; + register_t pcb_gsbase; struct savefpu pcb_save; u_long pcb_flags; diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h index c33f7b8..f14ee31 100644 --- a/sys/amd64/include/sysarch.h +++ b/sys/amd64/include/sysarch.h @@ -34,9 +34,27 @@ */ /* - * Architecture specific syscalls (i386) + * Architecture specific syscalls (AMD64) */ #ifndef _MACHINE_SYSARCH_H_ #define _MACHINE_SYSARCH_H_ +#define AMD64_GET_FSBASE 0 +#define AMD64_SET_FSBASE 1 +#define AMD64_GET_GSBASE 2 +#define AMD64_SET_GSBASE 3 + +#if 0 /* these wrappers need to be implemented in libc first */ +#ifndef _KERNEL +#include <sys/cdefs.h> + +__BEGIN_DECLS +unsigned long amd64_get_fsbase(void); +unsigned long amd64_set_fsbase(unsigned long); +unsigned long amd64_get_gsbase(void); +unsigned long amd64_set_gsbase(unsigned long); +__END_DECLS +#endif +#endif + #endif /* !_MACHINE_SYSARCH_H_ */ diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S index c9fea1a..d778f3f 100644 --- a/sys/amd64/isa/icu_vector.S +++ b/sys/amd64/isa/icu_vector.S @@ -25,7 +25,10 @@ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - movq %rdi,TF_RDI(%rsp) ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ movq %rsi,TF_RSI(%rsp) ; \ movq %rdx,TF_RDX(%rsp) ; \ movq %rcx,TF_RCX(%rsp) ; \ @@ -69,7 +72,10 @@ IDTVEC(vec_name) ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ subq $TF_RIP,%rsp ; /* skip dummy tf_err and tf_trapno */ \ - movq %rdi,TF_RDI(%rsp) ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel? */ \ + jz 1f ; /* Yes, dont swapgs again */ \ + swapgs ; \ +1: movq %rdi,TF_RDI(%rsp) ; \ movq %rsi,TF_RSI(%rsp) ; \ movq %rdx,TF_RDX(%rsp) ; \ movq %rcx,TF_RCX(%rsp) ; \ |