diff options
Diffstat (limited to 'sys/amd64')
25 files changed, 602 insertions, 228 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index ff47afb..952539f 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -87,7 +87,6 @@ END(cpu_throw) ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 - orl $PCB_FULL_IRET,PCB_FLAGS(%r8) movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) @@ -99,6 +98,30 @@ ENTRY(cpu_switch) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) + testl $PCB_FULL_IRET,PCB_FLAGS(%r8) + jnz 2f + orl $PCB_FULL_IRET,PCB_FLAGS(%r8) + testl $TDP_KTHREAD,TD_PFLAGS(%rdi) + jnz 2f + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movl %fs,%eax + cmpl $KUF32SEL,%eax + jne 1f + rdfsbase %rax + movq %rax,PCB_FSBASE(%r8) +1: movl %gs,%eax + cmpl $KUG32SEL,%eax + jne 2f + movq %rdx,%r12 + movl $MSR_KGSBASE,%ecx /* Read user gs base */ + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%r8) + movq %r12,%rdx + +2: testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: @@ -149,7 +172,7 @@ sw1: * to load up the rest of the next context. */ - /* Skip loading user fsbase/gsbase for kthreads */ + /* Skip loading LDT and user fsbase/gsbase for kthreads */ testl $TDP_KTHREAD,TD_PFLAGS(%r12) jnz do_kthread diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c index ca07adc..4e9476a 100644 --- a/sys/amd64/amd64/elf_machdep.c +++ b/sys/amd64/amd64/elf_machdep.c @@ -84,6 +84,25 @@ struct sysentvec elf64_freebsd_sysvec = { }; INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec); +void +amd64_lower_shared_page(struct sysentvec *sv) +{ + if (hw_lower_amd64_sharedpage != 0) { + sv->sv_maxuser -= PAGE_SIZE; + sv->sv_shared_page_base -= PAGE_SIZE; + sv->sv_usrstack -= PAGE_SIZE; + sv->sv_psstrings -= PAGE_SIZE; + } +} + +/* + * Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter + * uses the value of sv_shared_page_base. + */ +SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST, + (sysinit_cfunc_t) amd64_lower_shared_page, + &elf64_freebsd_sysvec); + static Elf64_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_X86_64, diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 524e729..b89c2eb 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -388,8 +388,24 @@ prot_addrf: je 5f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 6f /* already running with kernel GS.base */ - swapgs + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbase %rax +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + rdgsbase %rdx +2: swapgs movq PCPU(CURPCB),%rdi + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 4f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 3f + movq %rax,PCB_FSBASE(%rdi) +3: cmpw $KUG32SEL,TF_GS(%rsp) + jne 4f + movq %rdx,PCB_GSBASE(%rdi) 4: call handle_ibrs_entry orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) @@ -409,8 +425,8 @@ prot_addrf: * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * - * We do not support invoking this from a custom %cs or %ss (e.g. using - * entries from an LDT). + * We do not support invoking this from a custom segment registers, + * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ SUPERALIGN_TEXT IDTVEC(fast_syscall_pti) @@ -597,6 +613,19 @@ nmi_fromuserspace: testq %rdi,%rdi jz 3f orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 3f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 2f + rdfsbase %rax + movq %rax,PCB_FSBASE(%rdi) +2: cmpw $KUG32SEL,TF_GS(%rsp) + jne 3f + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%rdi) 3: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: @@ -909,6 +938,7 @@ doreti_exit: jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 4c70e88..540966b 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -806,6 +806,7 @@ fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, struct pcb *pcb; int error; + addr->sv_env.en_mxcsr &= cpu_mxcsr_mask; pcb = td->td_pcb; critical_enter(); if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 2818111..de3e461 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -48,6 +48,11 @@ __FBSDID("$FreeBSD$"); static int hw_instruction_sse; SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, &hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU"); +static int lower_sharedpage_init; +int hw_lower_amd64_sharedpage; +SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN, + &hw_lower_amd64_sharedpage, 0, + "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory"); /* * -1: automatic (default) * 0: keep enable CLFLUSH @@ -122,6 +127,28 @@ init_amd(void) wrmsr(0xc0011020, msr); } } + + /* + * Work around a problem on Ryzen that is triggered by executing + * code near the top of user memory, in our case the signal + * trampoline code in the shared page on amd64. + * + * This function is executed once for the BSP before tunables take + * effect so the value determined here can be overridden by the + * tunable. This function is then executed again for each AP and + * also on resume. Set a flag the first time so that value set by + * the tunable is not overwritten. + * + * The stepping and/or microcode versions should be checked after + * this issue is fixed by AMD so that we don't use this mode if not + * needed. + */ + if (lower_sharedpage_init == 0) { + lower_sharedpage_init = 1; + if (CPUID_TO_FAMILY(cpu_id) == 0x17) { + hw_lower_amd64_sharedpage = 1; + } + } } /* diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index dd5bb06..51b8433 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); + update_pcb_bases(pcb); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, @@ -452,7 +453,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; - set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -558,6 +558,7 @@ sys_sigreturn(td, uap) return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + update_pcb_bases(pcb); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; @@ -569,7 +570,6 @@ sys_sigreturn(td, uap) #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); - set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } @@ -597,11 +597,11 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) else mtx_unlock(&dt_lock); + update_pcb_bases(pcb); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; - set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; @@ -1572,6 +1572,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) kmdp = init_ops.parse_preload_data(modulep); identify_cpu1(); + identify_hypervisor(); /* Init basic tunables, hz etc */ init_param1(); @@ -2193,6 +2194,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags) mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); + update_pcb_bases(pcb); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; @@ -2263,11 +2265,11 @@ set_mcontext(struct thread *td, mcontext_t *mcp) tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } + set_pcb_flags(pcb, PCB_FULL_IRET); if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } - set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } @@ -2298,7 +2300,6 @@ static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { - struct savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) @@ -2311,9 +2312,8 @@ set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { - fpstate = (struct savefpu *)&mcp->mc_fpstate; - fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); + error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, + xfpustate, xfpustate_len); } else return (EINVAL); return (error); @@ -2538,6 +2538,71 @@ user_dbreg_trap(void) return 0; } +/* + * The pcb_flags is only modified by current thread, or by other threads + * when current thread is stopped. However, current thread may change it + * from the interrupt context in cpu_switch(), or in the trap handler. + * When we read-modify-write pcb_flags from C sources, compiler may generate + * code that is not atomic regarding the interrupt handler. If a trap or + * interrupt happens and any flag is modified from the handler, it can be + * clobbered with the cached value later. Therefore, we implement setting + * and clearing flags with single-instruction functions, which do not race + * with possible modification of the flags from the trap or interrupt context, + * because traps and interrupts are executed only on instruction boundary. + */ +void +set_pcb_flags_raw(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("orl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) + : "cc", "memory"); + +} + +/* + * The support for RDFSBASE, WRFSBASE and similar instructions for %gs + * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into + * pcb if user space modified the bases. We must save on the context + * switch or if the return to usermode happens through the doreti. + * + * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, + * which have a consequence that the base MSRs must be saved each time + * the PCB_FULL_IRET flag is set. We disable interrupts to sync with + * context switches. + */ +void +set_pcb_flags(struct pcb *pcb, const u_int flags) +{ + register_t r; + + if (curpcb == pcb && + (flags & PCB_FULL_IRET) != 0 && + (pcb->pcb_flags & PCB_FULL_IRET) == 0 && + (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) { + r = intr_disable(); + if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { + if (rfs() == _ufssel) + pcb->pcb_fsbase = rdfsbase(); + if (rgs() == _ugssel) + pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); + } + set_pcb_flags_raw(pcb, flags); + intr_restore(r); + } else { + set_pcb_flags_raw(pcb, flags); + } +} + +void +clear_pcb_flags(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("andl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) + : "cc", "memory"); +} + #ifdef KDB /* diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 2989eb40..64abe9b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2566,9 +2566,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) /* * allocate the page directory page */ - while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) - VM_WAIT; + pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); pml4phys = VM_PAGE_TO_PHYS(pml4pg); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys); diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c index dbb3f13..169e15c 100644 --- a/sys/amd64/amd64/ptrace_machdep.c +++ b/sys/amd64/amd64/ptrace_machdep.c @@ -117,15 +117,17 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data) static void cpu_ptrace_setbase(struct thread *td, int req, register_t r) { + struct pcb *pcb; + pcb = td->td_pcb; + set_pcb_flags(pcb, PCB_FULL_IRET); if (req == PT_SETFSBASE) { - td->td_pcb->pcb_fsbase = r; + pcb->pcb_fsbase = r; td->td_frame->tf_fs = _ufssel; } else { - td->td_pcb->pcb_gsbase = r; + pcb->pcb_gsbase = r; td->td_frame->tf_gs = _ugssel; } - set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } #ifdef COMPAT_FREEBSD32 @@ -136,6 +138,7 @@ static int cpu32_ptrace(struct thread *td, int req, void *addr, int data) { struct savefpu *fpstate; + struct pcb *pcb; uint32_t r; int error; @@ -167,8 +170,10 @@ cpu32_ptrace(struct thread *td, int req, void *addr, int data) error = EINVAL; break; } - r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase : - td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase; error = copyout(&r, addr, sizeof(r)); break; @@ -197,6 +202,7 @@ int cpu_ptrace(struct thread *td, int req, void *addr, int data) { register_t *r, rv; + struct pcb *pcb; int error; #ifdef COMPAT_FREEBSD32 @@ -221,8 +227,10 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) case PT_GETFSBASE: case PT_GETGSBASE: - r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase : - &td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase; error = copyout(r, addr, sizeof(*r)); break; diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index f6be94e..aed23d6 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -51,7 +51,6 @@ ENTRY(bzero) movq %rsi,%rcx xorl %eax,%eax shrq $3,%rcx - cld rep stosq movq %rsi,%rcx @@ -77,7 +76,6 @@ ENTRY(bcmp) PUSH_FRAME_POINTER movq %rdx,%rcx shrq $3,%rcx - cld /* compare forwards */ repe cmpsq jne 1f @@ -109,7 +107,6 @@ ENTRY(bcopy) jb 1f shrq $3,%rcx /* copy by 64-bit words */ - cld /* nope, copy forwards */ rep movsq movq %rdx,%rcx @@ -148,7 +145,6 @@ ENTRY(memcpy) movq %rdi,%rax movq %rdx,%rcx shrq $3,%rcx /* copy by 64-bit words */ - cld /* copy forwards */ rep movsq movq %rdx,%rcx @@ -195,7 +191,6 @@ ENTRY(fillw) movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx - cld rep stosw POP_FRAME_POINTER @@ -215,7 +210,7 @@ END(fillw) */ /* - * copyout(from_kernel, to_user, len) - MP SAFE + * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ ENTRY(copyout) @@ -226,12 +221,11 @@ ENTRY(copyout) jz done_copyout /* - * Check explicitly for non-user addresses. If 486 write protection - * is being used, this check is essential because we are in kernel - * mode so the h/w does not provide any protection against writing - * kernel addresses. + * Check explicitly for non-user addresses. This check is essential + * because it prevents usermode from writing into the kernel. We do + * not verify anywhere else that the user did not specify a rogue + * address. */ - /* * First, prevent address wrapping. */ @@ -253,7 +247,6 @@ ENTRY(copyout) movq %rdx,%rcx shrq $3,%rcx - cld rep movsq movb %dl,%cl @@ -278,7 +271,7 @@ copyout_fault: END(copyout) /* - * copyin(from_user, to_kernel, len) - MP SAFE + * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ ENTRY(copyin) @@ -302,7 +295,6 @@ ENTRY(copyin) movq %rdx,%rcx movb %cl,%al shrq $3,%rcx /* copy longword-wise */ - cld rep movsq movb %al,%cl @@ -495,7 +487,7 @@ fusufault: /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to - * user memory. All these functions are MPSAFE. + * user memory. * addr = %rdi, value = %rsi */ ALTENTRY(suword64) @@ -570,7 +562,7 @@ ENTRY(subyte) END(subyte) /* - * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE + * copyinstr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx * * copy a string from from to to, stop when a 0 character is reached. @@ -599,7 +591,6 @@ ENTRY(copyinstr) movq %rax,%r8 1: incq %rdx - cld 2: decq %rdx @@ -641,7 +632,7 @@ cpystrflt_x: END(copyinstr) /* - * copystr(from, to, maxlen, int *lencopied) - MP SAFE + * copystr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) @@ -650,7 +641,6 @@ ENTRY(copystr) xchgq %rdi,%rsi incq %rdx - cld 1: decq %rdx jz 4f @@ -681,7 +671,6 @@ END(copystr) /* * Handling of special amd64 registers and descriptor tables etc - * %rdi */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 8867aed..42cae4a 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -256,39 +256,45 @@ sysarch(td, uap) error = amd64_set_ioperm(td, &iargs); break; case I386_GET_FSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = i386base; td->td_frame->tf_fs = _ufssel; update_gdt_fsbase(td, i386base); } break; case I386_GET_GSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_gsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = i386base; td->td_frame->tf_gs = _ugssel; update_gdt_gsbase(td, i386base); } break; case AMD64_GET_FSBASE: - error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_fsbase, uap->parms, + sizeof(pcb->pcb_fsbase)); break; case AMD64_SET_FSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_fsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_fsbase = a64base; td->td_frame->tf_fs = _ufssel; } else error = EINVAL; @@ -296,15 +302,17 @@ sysarch(td, uap) break; case AMD64_GET_GSBASE: - error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_gsbase, uap->parms, + sizeof(pcb->pcb_gsbase)); break; case AMD64_SET_GSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_gsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_gsbase = a64base; td->td_frame->tf_gs = _ugssel; } else error = EINVAL; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index fccd297..a553fc5 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -99,9 +99,8 @@ PMC_SOFT_DEFINE( , , page_fault, write); #include <sys/dtrace_bsd.h> #endif -extern void __noinline trap(struct trapframe *frame); -extern void trap_check(struct trapframe *frame); -extern void syscall(struct trapframe *frame); +void __noinline trap(struct trapframe *frame); +void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); static int trap_pfault(struct trapframe *, int); @@ -166,37 +165,41 @@ trap(struct trapframe *frame) #ifdef KDTRACE_HOOKS struct reg regs; #endif - struct thread *td = curthread; - struct proc *p = td->td_proc; + ksiginfo_t ksi; + struct thread *td; + struct proc *p; + register_t addr; #ifdef KDB register_t dr6; #endif - int i = 0, ucode = 0; + int signo, ucode; u_int type; - register_t addr = 0; - ksiginfo_t ksi; + + td = curthread; + p = td->td_proc; + signo = 0; + ucode = 0; + addr = 0; PCPU_INC(cnt.v_trap); type = frame->tf_trapno; #ifdef SMP /* Handler for NMI IPIs used for stopping CPUs. */ - if (type == T_NMI) { - if (ipi_nmi_handler() == 0) - goto out; - } -#endif /* SMP */ + if (type == T_NMI && ipi_nmi_handler() == 0) + return; +#endif #ifdef KDB if (kdb_active) { kdb_reenter(); - goto out; + return; } #endif if (type == T_RESERVED) { trap_fatal(frame, 0); - goto out; + return; } if (type == T_NMI) { @@ -209,12 +212,12 @@ trap(struct trapframe *frame) */ if (pmc_intr != NULL && (*pmc_intr)(PCPU_GET(cpuid), frame) != 0) - goto out; + return; #endif #ifdef STACK if (stack_nmi_handler(frame) != 0) - goto out; + return; #endif } @@ -259,7 +262,7 @@ trap(struct trapframe *frame) switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ - i = SIGILL; + signo = SIGILL; ucode = ILL_PRVOPC; break; @@ -271,41 +274,41 @@ trap(struct trapframe *frame) fill_frame_regs(frame, ®s); if (dtrace_pid_probe_ptr != NULL && dtrace_pid_probe_ptr(®s) == 0) - goto out; + return; } #endif frame->tf_rflags &= ~PSL_T; - i = SIGTRAP; + signo = SIGTRAP; ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT); break; case T_ARITHTRAP: /* arithmetic trap */ ucode = fputrap_x87(); if (ucode == -1) - goto userout; - i = SIGFPE; + return; + signo = SIGFPE; break; case T_PROTFLT: /* general protection fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; case T_STKFLT: /* stack fault */ case T_SEGNPFLT: /* segment not present fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_ADRERR; break; case T_TSSFLT: /* invalid TSS fault */ - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; case T_ALIGNFLT: - i = SIGBUS; + signo = SIGBUS; ucode = BUS_ADRALN; break; case T_DOUBLEFLT: /* double fault */ default: - i = SIGBUS; + signo = SIGBUS; ucode = BUS_OBJERR; break; @@ -315,67 +318,64 @@ trap(struct trapframe *frame) */ if (*p->p_sysent->sv_trap != NULL && (*p->p_sysent->sv_trap)(td) == 0) - goto userout; + return; addr = frame->tf_addr; - i = trap_pfault(frame, TRUE); - if (i == -1) - goto userout; - if (i == 0) - goto user; - - if (i == SIGSEGV) + signo = trap_pfault(frame, TRUE); + if (signo == -1) + return; + if (signo == 0) + goto userret; + if (signo == SIGSEGV) { ucode = SEGV_MAPERR; - else { - if (prot_fault_translation == 0) { - /* - * Autodetect. - * This check also covers the images - * without the ABI-tag ELF note. - */ - if (SV_CURPROC_ABI() == SV_ABI_FREEBSD - && p->p_osrel >= P_OSREL_SIGSEGV) { - i = SIGSEGV; - ucode = SEGV_ACCERR; - } else { - i = SIGBUS; - ucode = BUS_PAGE_FAULT; - } - } else if (prot_fault_translation == 1) { - /* - * Always compat mode. - */ - i = SIGBUS; - ucode = BUS_PAGE_FAULT; - } else { - /* - * Always SIGSEGV mode. - */ - i = SIGSEGV; + } else if (prot_fault_translation == 0) { + /* + * Autodetect. This check also covers + * the images without the ABI-tag ELF + * note. + */ + if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && + p->p_osrel >= P_OSREL_SIGSEGV) { + signo = SIGSEGV; ucode = SEGV_ACCERR; + } else { + signo = SIGBUS; + ucode = BUS_PAGE_FAULT; } + } else if (prot_fault_translation == 1) { + /* + * Always compat mode. + */ + signo = SIGBUS; + ucode = BUS_PAGE_FAULT; + } else { + /* + * Always SIGSEGV mode. + */ + signo = SIGSEGV; + ucode = SEGV_ACCERR; } break; case T_DIVIDE: /* integer divide fault */ ucode = FPE_INTDIV; - i = SIGFPE; + signo = SIGFPE; break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); - break; -#endif /* DEV_ISA */ + return; +#endif case T_OFLOW: /* integer overflow fault */ ucode = FPE_INTOVF; - i = SIGFPE; + signo = SIGFPE; break; case T_BOUND: /* bounds check fault */ ucode = FPE_FLTSUB; - i = SIGFPE; + signo = SIGFPE; break; case T_DNA: @@ -383,27 +383,26 @@ trap(struct trapframe *frame) KASSERT(PCB_USER_FPU(td->td_pcb), ("kernel FPU ctx has leaked")); fpudna(); - goto userout; + return; case T_FPOPFLT: /* FPU operand fetch fault */ ucode = ILL_COPROC; - i = SIGILL; + signo = SIGILL; break; case T_XMMFLT: /* SIMD floating-point exception */ ucode = fputrap_sse(); if (ucode == -1) - goto userout; - i = SIGFPE; + return; + signo = SIGFPE; break; #ifdef KDTRACE_HOOKS case T_DTRACE_RET: enable_intr(); fill_frame_regs(frame, ®s); - if (dtrace_return_probe_ptr != NULL && - dtrace_return_probe_ptr(®s) == 0) - goto out; - break; + if (dtrace_return_probe_ptr != NULL) + dtrace_return_probe_ptr(®s); + return; #endif } } else { @@ -414,13 +413,13 @@ trap(struct trapframe *frame) switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(frame, FALSE); - goto out; + return; case T_DNA: if (PCB_USER_FPU(td->td_pcb)) panic("Unregistered use of FPU in kernel"); fpudna(); - goto out; + return; case T_ARITHTRAP: /* arithmetic trap */ case T_XMMFLT: /* SIMD floating-point exception */ @@ -430,7 +429,7 @@ trap(struct trapframe *frame) * registration for FPU traps is overkill. */ trap_fatal(frame, 0); - goto out; + return; case T_STKFLT: /* stack fault */ case T_PROTFLT: /* general protection fault */ @@ -469,35 +468,35 @@ trap(struct trapframe *frame) sizeof(register_t)) frame->tf_rsp = PCPU_GET(rsp0) - 5 * sizeof(register_t); - goto out; + return; } if (frame->tf_rip == (long)ld_ds) { frame->tf_rip = (long)ds_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_es) { frame->tf_rip = (long)es_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_fs) { frame->tf_rip = (long)fs_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_gs) { frame->tf_rip = (long)gs_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_gsbase) { frame->tf_rip = (long)gsbase_load_fault; - goto out; + return; } if (frame->tf_rip == (long)ld_fsbase) { frame->tf_rip = (long)fsbase_load_fault; - goto out; + return; } if (curpcb->pcb_onfault != NULL) { frame->tf_rip = (long)curpcb->pcb_onfault; - goto out; + return; } break; @@ -513,7 +512,7 @@ trap(struct trapframe *frame) */ if (frame->tf_rflags & PSL_NT) { frame->tf_rflags &= ~PSL_NT; - goto out; + return; } break; @@ -534,7 +533,7 @@ trap(struct trapframe *frame) * processor doesn't */ load_dr6(rdr6() & ~0xf); - goto out; + return; } /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) @@ -549,27 +548,27 @@ trap(struct trapframe *frame) dr6 = rdr6(); load_dr6(dr6 & ~0x4000); if (kdb_trap(type, dr6, frame)) - goto out; + return; #endif break; #ifdef DEV_ISA case T_NMI: nmi_handle_intr(type, frame); - goto out; -#endif /* DEV_ISA */ + return; +#endif } trap_fatal(frame, 0); - goto out; + return; } /* Translate fault for emulators (e.g. Linux) */ - if (*p->p_sysent->sv_transtrap) - i = (*p->p_sysent->sv_transtrap)(i, type); + if (*p->p_sysent->sv_transtrap != NULL) + signo = (*p->p_sysent->sv_transtrap)(signo, type); ksiginfo_init_trap(&ksi); - ksi.ksi_signo = i; + ksi.ksi_signo = signo; ksi.ksi_code = ucode; ksi.ksi_trapno = type; ksi.ksi_addr = (void *)addr; @@ -577,8 +576,8 @@ trap(struct trapframe *frame) uprintf("pid %d comm %s: signal %d err %lx code %d type %d " "addr 0x%lx rsp 0x%lx rip 0x%lx " "<%02x %02x %02x %02x %02x %02x %02x %02x>\n", - p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr, - frame->tf_rsp, frame->tf_rip, + p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type, + addr, frame->tf_rsp, frame->tf_rip, fubyte((void *)(frame->tf_rip + 0)), fubyte((void *)(frame->tf_rip + 1)), fubyte((void *)(frame->tf_rip + 2)), @@ -590,14 +589,10 @@ trap(struct trapframe *frame) } KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled")); trapsignal(td, &ksi); - -user: +userret: userret(td, frame); KASSERT(PCB_USER_FPU(td->td_pcb), ("Return from trap with kernel FPU ctx leaked")); -userout: -out: - return; } /* @@ -617,17 +612,20 @@ trap_check(struct trapframe *frame) } static int -trap_pfault(frame, usermode) - struct trapframe *frame; - int usermode; +trap_pfault(struct trapframe *frame, int usermode) { - vm_offset_t va; + struct thread *td; + struct proc *p; vm_map_t map; - int rv = 0; + vm_offset_t va; + int rv; vm_prot_t ftype; - struct thread *td = curthread; - struct proc *p = td->td_proc; - vm_offset_t eva = frame->tf_addr; + vm_offset_t eva; + + td = curthread; + p = td->td_proc; + eva = frame->tf_addr; + rv = 0; if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) { /* @@ -854,16 +852,18 @@ dblfault_handler(struct trapframe *frame) } int -cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cpu_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; register_t *argp; + struct syscall_args *sa; caddr_t params; int reg, regcnt, error; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; reg = 0; regcnt = 6; @@ -914,7 +914,6 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) void amd64_syscall(struct thread *td, int traced) { - struct syscall_args sa; int error; ksiginfo_t ksi; @@ -924,7 +923,7 @@ amd64_syscall(struct thread *td, int traced) /* NOT REACHED */ } #endif - error = syscallenter(td, &sa); + error = syscallenter(td); /* * Traced syscall. @@ -940,15 +939,16 @@ amd64_syscall(struct thread *td, int traced) KASSERT(PCB_USER_FPU(td->td_pcb), ("System call %s returning with kernel FPU ctx leaked", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td), ("System call %s returning with mangled pcb_save", - syscallname(td->td_proc, sa.code))); + syscallname(td->td_proc, td->td_sa.code))); KASSERT(td->td_md.md_invl_gen.gen == 0, ("System call %s returning with leaked invl_gen %lu", - syscallname(td->td_proc, sa.code), td->td_md.md_invl_gen.gen)); + syscallname(td->td_proc, td->td_sa.code), + td->td_md.md_invl_gen.gen)); - syscallret(td, error, &sa); + syscallret(td, error); /* * If the user-supplied value of %rip is not a canonical @@ -958,6 +958,6 @@ amd64_syscall(struct thread *td, int traced) * not be safe. Instead, use the full return path which * catches the problem safely. */ - if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) + if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)) set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c index db566ae..220d095 100644 --- a/sys/amd64/amd64/uma_machdep.c +++ b/sys/amd64/amd64/uma_machdep.c @@ -46,20 +46,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) vm_page_t m; vm_paddr_t pa; void *va; - int pflags; *flags = UMA_SLAB_PRIV; - pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; - for (;;) { - m = vm_page_alloc(NULL, 0, pflags); - if (m == NULL) { - if (wait & M_NOWAIT) - return (NULL); - else - VM_WAIT; - } else - break; - } + m = vm_page_alloc(NULL, 0, + malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); + if (m == NULL) + return (NULL); pa = m->phys_addr; if ((wait & M_NODUMP) == 0) dump_add_page(pa); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 8846eb8..d95bb56 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -176,6 +176,7 @@ cpu_fork(td1, p2, td2, flags) /* Ensure that td1's pcb is up to date. */ fpuexit(td1); + update_pcb_bases(td1->td_pcb); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); @@ -242,7 +243,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_tssp = NULL; /* New segment registers. */ - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; @@ -439,13 +440,14 @@ cpu_copy_thread(struct thread *td, struct thread *td0) * Those not loaded individually below get their default * values here. */ + update_pcb_bases(td0->td_pcb); bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* * Create a new fresh stack for the new thread. diff --git a/sys/amd64/cloudabi32/cloudabi32_sysvec.c b/sys/amd64/cloudabi32/cloudabi32_sysvec.c index abede1a..d5dd58c 100644 --- a/sys/amd64/cloudabi32/cloudabi32_sysvec.c +++ b/sys/amd64/cloudabi32/cloudabi32_sysvec.c @@ -90,11 +90,15 @@ cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi32_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; int error; + frame = td->td_frame; + sa = &td->td_sa; + /* Obtain system call number. */ sa->code = frame->tf_rax; if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL) diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c index 84f0cb3..679888b 100644 --- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c +++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c @@ -87,9 +87,13 @@ cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp, } static int -cloudabi64_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +cloudabi64_fetch_syscall_args(struct thread *td) { - struct trapframe *frame = td->td_frame; + struct trapframe *frame; + struct syscall_args *sa; + + frame = td->td_frame; + sa = &td->td_sa; /* Obtain system call number. */ sa->code = frame->tf_rax; diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index de7cbe6..b4eb43d 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -29,6 +29,7 @@ options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem diff --git a/sys/amd64/conf/pfSense b/sys/amd64/conf/pfSense new file mode 100644 index 0000000..26c071e --- /dev/null +++ b/sys/amd64/conf/pfSense @@ -0,0 +1,182 @@ +include GENERIC + +nooptions KDB_TRACE +options DDB + +ident pfSense + +nooptions MAC # TrustedBSD MAC Framework +nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4 +nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5 +nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6 +nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7 + +options GEOM_MIRROR +options GEOM_UZIP +options GEOM_ELI +options GEOM_BDE + +options TMPFS +options UNIONFS +options NULLFS +options PPS_SYNC + +# Wireless +#nooptions IEEE80211_DEBUG # enable debug msgs +device wlan_rssadapt +device wlan_xauth +device wlan_acl +device iwifw +device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver +device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver +device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver +device uath # Atheros USB IEEE 802.11a/b/g wireless network device +device ralfw # Firmware for Ralink Technology RT2500 wireless NICs. +device ural # Ralink Technology RT2500USB IEEE 802.11 driver +device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device +device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device +device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver +device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device +device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless +device udav # Davicom DM9601 USB Ethernet driver +device axe +device axge +device aue +device cue +device kue +device mos +device rsu +device rsufw +device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn +device runfw +device rue +device rtwn +device rtwnfw +device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver +device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver + +# Pseudo devices. +#device pty # Pseudo-ttys (telnet etc) + +# USB support +nooptions USB_DEBUG # enable debug msgs + +# 3G devices +device ufoma +device ucom +device uslcom +device uplcom +device umct +device uvisor +device uark +device uftdi +device uvscom +device umodem +device u3g +device cdce + +device uhid # "Human Interface Devices" + +# FireWire support +device firewire # FireWire bus code +device sbp # SCSI over FireWire (Requires scbus and da) + +# pfsense addons + +device tap +device gre +device if_bridge +device carp +device lagg +device vte + +# IP/IPFW +options IPFIREWALL_DEFAULT_TO_ACCEPT +options IPFIREWALL_VERBOSE +options IPSTEALTH + +# Netgraph +options NETGRAPH #netgraph(4) system +options NETGRAPH_VLAN +options NETGRAPH_L2TP +options NETGRAPH_BPF +options NETGRAPH_ETHER +options NETGRAPH_IFACE +options NETGRAPH_EIFACE +options NETGRAPH_PPP +options NETGRAPH_PPPOE +options NETGRAPH_PPTPGRE +options NETGRAPH_RFC1490 +options NETGRAPH_SOCKET +options NETGRAPH_TTY +options NETGRAPH_MPPC_ENCRYPTION +options NETGRAPH_UI +options NETGRAPH_VJC +options NETGRAPH_KSOCKET +options NETGRAPH_LMI +options NETGRAPH_ONE2MANY +options NETGRAPH_BRIDGE +options NETGRAPH_CISCO +options NETGRAPH_ECHO +options NETGRAPH_ASYNC +options NETGRAPH_FRAME_RELAY +options NETGRAPH_HOLE +options NETGRAPH_TEE +options NETGRAPH_TCPMSS +options NETGRAPH_PIPE +options NETGRAPH_CAR +options NETGRAPH_DEFLATE +options NETGRAPH_PRED1 + +# IPSEC +options TCP_SIGNATURE +device enc + +# ALTQ +options ALTQ +options ALTQ_CBQ +options ALTQ_RED +options ALTQ_RIO +options ALTQ_HFSC +options ALTQ_PRIQ +options ALTQ_FAIRQ +options ALTQ_NOPCC +options ALTQ_CODEL + +# Squid related settings +options MSGMNB=8192 # max # of bytes in a queue +options MSGMNI=40 # number of message queue identifiers +options MSGSEG=512 # number of message segments per queue +options MSGSSZ=32 # size of a message segment +options MSGTQL=2048 # max messages in system + +device pf +device pflog +device pfsync + +device rndtest # FIPS 140-2 entropy tester +device hifn # Hifn 7951, 7781, etc. +options HIFN_DEBUG # enable debugging support: hw.hifn.debug +options HIFN_RNDTEST # enable rndtest support +device ubsec # Broadcom 5501, 5601, 58xx +device safe # safe -- SafeNet crypto accelerator +device padlock + +device speaker + +options MROUTING + +# Additional cards +device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver +device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver +device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver +#device nve # nVidia nForce MCP on-board Ethernet Networking +device oce + +# Default serial speed +options CONSPEED=115200 + +# Enable gpioapu +#device gpioapu +#device gpiorcc diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c index c2bf2fb..f743440 100644 --- a/sys/amd64/ia32/ia32_syscall.c +++ b/sys/amd64/ia32/ia32_syscall.c @@ -106,16 +106,18 @@ ia32_set_syscall_retval(struct thread *td, int error) } int -ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +ia32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; caddr_t params; u_int32_t args[8], tmp; int error, i; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t); sa->code = frame->tf_rax; @@ -176,7 +178,6 @@ void ia32_syscall(struct trapframe *frame) { struct thread *td; - struct syscall_args sa; register_t orig_tf_rflags; int error; ksiginfo_t ksi; @@ -185,7 +186,7 @@ ia32_syscall(struct trapframe *frame) td = curthread; td->td_frame = frame; - error = syscallenter(td, &sa); + error = syscallenter(td); /* * Traced syscall. @@ -199,7 +200,7 @@ ia32_syscall(struct trapframe *frame) trapsignal(td, &ksi); } - syscallret(td, error, &sa); + syscallret(td, error); } static void diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 5fa0d77..2107b2ba 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -651,6 +651,38 @@ load_gs(u_short sel) } #endif +static __inline uint64_t +rdfsbase(void) +{ + uint64_t x; + + __asm __volatile("rdfsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrfsbase(uint64_t x) +{ + + __asm __volatile("wrfsbase %0" : : "r" (x)); +} + +static __inline uint64_t +rdgsbase(void) +{ + uint64_t x; + + __asm __volatile("rdgsbase %0" : "=r" (x)); + return (x); +} + +static __inline void +wrgsbase(uint64_t x) +{ + + __asm __volatile("wrgsbase %0" : : "r" (x)); +} + static __inline void bare_lgdt(struct region_descriptor *addr) { diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index b81f497..f4d6e60 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -34,7 +34,8 @@ #include <x86/x86_var.h> -extern uint64_t *vm_page_dump; +extern uint64_t *vm_page_dump; +extern int hw_lower_amd64_sharedpage; extern int hw_ibrs_disable; /* @@ -44,9 +45,11 @@ extern int hw_ibrs_disable; extern char kernphys[]; struct savefpu; +struct sysentvec; void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); +void amd64_lower_shared_page(struct sysentvec *); void amd64_syscall(struct thread *td, int traced); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 2b7bb6e..09aea36 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -119,40 +119,15 @@ struct susppcb { #ifdef _KERNEL struct trapframe; -/* - * The pcb_flags is only modified by current thread, or by other threads - * when current thread is stopped. However, current thread may change it - * from the interrupt context in cpu_switch(), or in the trap handler. - * When we read-modify-write pcb_flags from C sources, compiler may generate - * code that is not atomic regarding the interrupt handler. If a trap or - * interrupt happens and any flag is modified from the handler, it can be - * clobbered with the cached value later. Therefore, we implement setting - * and clearing flags with single-instruction functions, which do not race - * with possible modification of the flags from the trap or interrupt context, - * because traps and interrupts are executed only on instruction boundary. - */ -static __inline void -set_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("orl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) - : "cc"); -} - -static __inline void -clear_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("andl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) - : "cc"); -} - +void clear_pcb_flags(struct pcb *pcb, const u_int flags); void makectx(struct trapframe *, struct pcb *); +void set_pcb_flags(struct pcb *pcb, const u_int flags); +void set_pcb_flags_raw(struct pcb *pcb, const u_int flags); int savectx(struct pcb *) __returns_twice; void resumectx(struct pcb *); +/* Ensure that pcb_gsbase and pcb_fsbase are up to date */ +#define update_pcb_bases(pcb) set_pcb_flags((pcb), PCB_FULL_IRET) #endif #endif /* _AMD64_PCB_H_ */ diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index f4b59aa..4c2b244 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -70,6 +70,13 @@ struct mdproc { #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 768 +struct syscall_args { + u_int code; + struct sysent *callp; + register_t args[8]; + int narg; +}; + #ifdef _KERNEL /* Get the current kernel thread stack usage. */ @@ -92,13 +99,6 @@ int amd64_set_ldt_data(struct thread *td, int start, int num, extern struct mtx dt_lock; extern int max_ldt_segment; - -struct syscall_args { - u_int code; - struct sysent *callp; - register_t args[8]; - int narg; -}; #endif /* _KERNEL */ #endif /* !_MACHINE_PROC_H_ */ diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c index 6e12d41..942819b 100644 --- a/sys/amd64/linux/linux_sysvec.c +++ b/sys/amd64/linux/linux_sysvec.c @@ -126,7 +126,7 @@ static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); -static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa); +static int linux_fetch_syscall_args(struct thread *td); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static int linux_vsyscall(struct thread *td); @@ -217,13 +217,15 @@ translate_traps(int signal, int trap_code) } static int -linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +linux_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; @@ -831,6 +833,8 @@ static void linux_vdso_install(void *param) { + amd64_lower_shared_page(&elf_linux_sysvec); + linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index c264f3a..ea849ba 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -725,13 +725,15 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) } static int -linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) +linux32_fetch_syscall_args(struct thread *td) { struct proc *p; struct trapframe *frame; + struct syscall_args *sa; p = td->td_proc; frame = td->td_frame; + sa = &td->td_sa; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index 91b2c01..a48cb76 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/proc.h> #include <machine/clock.h> #include <machine/cpufunc.h> #include <machine/md_var.h> +#include <machine/pcb.h> #include <machine/specialreg.h> #include <machine/vmm.h> @@ -356,7 +358,8 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; - /* Save host MSRs (if any) and restore guest MSRs */ + /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ + update_pcb_bases(curpcb); wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); |