summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/cpu_switch.S27
-rw-r--r--sys/amd64/amd64/elf_machdep.c19
-rw-r--r--sys/amd64/amd64/exception.S36
-rw-r--r--sys/amd64/amd64/fpu.c1
-rw-r--r--sys/amd64/amd64/initcpu.c27
-rw-r--r--sys/amd64/amd64/machdep.c81
-rw-r--r--sys/amd64/amd64/pmap.c5
-rw-r--r--sys/amd64/amd64/ptrace_machdep.c22
-rw-r--r--sys/amd64/amd64/support.S29
-rw-r--r--sys/amd64/amd64/sys_machdep.c16
-rw-r--r--sys/amd64/amd64/trap.c234
-rw-r--r--sys/amd64/amd64/uma_machdep.c16
-rw-r--r--sys/amd64/amd64/vm_machdep.c6
-rw-r--r--sys/amd64/cloudabi32/cloudabi32_sysvec.c8
-rw-r--r--sys/amd64/cloudabi64/cloudabi64_sysvec.c8
-rw-r--r--sys/amd64/conf/GENERIC1
-rw-r--r--sys/amd64/conf/pfSense182
-rw-r--r--sys/amd64/ia32/ia32_syscall.c9
-rw-r--r--sys/amd64/include/cpufunc.h32
-rw-r--r--sys/amd64/include/md_var.h5
-rw-r--r--sys/amd64/include/pcb.h35
-rw-r--r--sys/amd64/include/proc.h14
-rw-r--r--sys/amd64/linux/linux_sysvec.c8
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c4
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c5
25 files changed, 602 insertions, 228 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index ff47afb..952539f 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -87,7 +87,6 @@ END(cpu_throw)
ENTRY(cpu_switch)
/* Switch to new thread. First, save context. */
movq TD_PCB(%rdi),%r8
- orl $PCB_FULL_IRET,PCB_FLAGS(%r8)
movq (%rsp),%rax /* Hardware registers */
movq %r15,PCB_R15(%r8)
@@ -99,6 +98,30 @@ ENTRY(cpu_switch)
movq %rbx,PCB_RBX(%r8)
movq %rax,PCB_RIP(%r8)
+ testl $PCB_FULL_IRET,PCB_FLAGS(%r8)
+ jnz 2f
+ orl $PCB_FULL_IRET,PCB_FLAGS(%r8)
+ testl $TDP_KTHREAD,TD_PFLAGS(%rdi)
+ jnz 2f
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 2f
+ movl %fs,%eax
+ cmpl $KUF32SEL,%eax
+ jne 1f
+ rdfsbase %rax
+ movq %rax,PCB_FSBASE(%r8)
+1: movl %gs,%eax
+ cmpl $KUG32SEL,%eax
+ jne 2f
+ movq %rdx,%r12
+ movl $MSR_KGSBASE,%ecx /* Read user gs base */
+ rdmsr
+ shlq $32,%rdx
+ orq %rdx,%rax
+ movq %rax,PCB_GSBASE(%r8)
+ movq %r12,%rdx
+
+2:
testl $PCB_DBREGS,PCB_FLAGS(%r8)
jnz store_dr /* static predict not taken */
done_store_dr:
@@ -149,7 +172,7 @@ sw1:
* to load up the rest of the next context.
*/
- /* Skip loading user fsbase/gsbase for kthreads */
+ /* Skip loading LDT and user fsbase/gsbase for kthreads */
testl $TDP_KTHREAD,TD_PFLAGS(%r12)
jnz do_kthread
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index ca07adc..4e9476a 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -84,6 +84,25 @@ struct sysentvec elf64_freebsd_sysvec = {
};
INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
+void
+amd64_lower_shared_page(struct sysentvec *sv)
+{
+ if (hw_lower_amd64_sharedpage != 0) {
+ sv->sv_maxuser -= PAGE_SIZE;
+ sv->sv_shared_page_base -= PAGE_SIZE;
+ sv->sv_usrstack -= PAGE_SIZE;
+ sv->sv_psstrings -= PAGE_SIZE;
+ }
+}
+
+/*
+ * Do this fixup before INIT_SYSENTVEC (SI_ORDER_ANY) because the latter
+ * uses the value of sv_shared_page_base.
+ */
+SYSINIT(elf64_sysvec_fixup, SI_SUB_EXEC, SI_ORDER_FIRST,
+ (sysinit_cfunc_t) amd64_lower_shared_page,
+ &elf64_freebsd_sysvec);
+
static Elf64_Brandinfo freebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 524e729..b89c2eb 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -388,8 +388,24 @@ prot_addrf:
je 5f /* kernel but with user gsbase!! */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 6f /* already running with kernel GS.base */
- swapgs
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 2f
+ cmpw $KUF32SEL,TF_FS(%rsp)
+ jne 1f
+ rdfsbase %rax
+1: cmpw $KUG32SEL,TF_GS(%rsp)
+ jne 2f
+ rdgsbase %rdx
+2: swapgs
movq PCPU(CURPCB),%rdi
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 4f
+ cmpw $KUF32SEL,TF_FS(%rsp)
+ jne 3f
+ movq %rax,PCB_FSBASE(%rdi)
+3: cmpw $KUG32SEL,TF_GS(%rsp)
+ jne 4f
+ movq %rdx,PCB_GSBASE(%rdi)
4: call handle_ibrs_entry
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
movw %es,TF_ES(%rsp)
@@ -409,8 +425,8 @@ prot_addrf:
* pointer. We have to juggle a few things around to find our stack etc.
* swapgs gives us access to our PCPU space only.
*
- * We do not support invoking this from a custom %cs or %ss (e.g. using
- * entries from an LDT).
+ * We do not support invoking this from a custom segment registers,
+ * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
*/
SUPERALIGN_TEXT
IDTVEC(fast_syscall_pti)
@@ -597,6 +613,19 @@ nmi_fromuserspace:
testq %rdi,%rdi
jz 3f
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+ testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+ jz 3f
+ cmpw $KUF32SEL,TF_FS(%rsp)
+ jne 2f
+ rdfsbase %rax
+ movq %rax,PCB_FSBASE(%rdi)
+2: cmpw $KUG32SEL,TF_GS(%rsp)
+ jne 3f
+ movl $MSR_KGSBASE,%ecx
+ rdmsr
+ shlq $32,%rdx
+ orq %rdx,%rax
+ movq %rax,PCB_GSBASE(%rdi)
3:
/* Note: this label is also used by ddb and gdb: */
nmi_calltrap:
@@ -909,6 +938,7 @@ doreti_exit:
jz ld_regs
testl $PCB_FULL_IRET,PCB_FLAGS(%r8)
jz ld_regs
+ andl $~PCB_FULL_IRET,PCB_FLAGS(%r8)
testl $TF_HASSEGS,TF_FLAGS(%rsp)
je set_segs
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 4c70e88..540966b 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -806,6 +806,7 @@ fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate,
struct pcb *pcb;
int error;
+ addr->sv_env.en_mxcsr &= cpu_mxcsr_mask;
pcb = td->td_pcb;
critical_enter();
if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 2818111..de3e461 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -48,6 +48,11 @@ __FBSDID("$FreeBSD$");
static int hw_instruction_sse;
SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
&hw_instruction_sse, 0, "SIMD/MMX2 instructions available in CPU");
+static int lower_sharedpage_init;
+int hw_lower_amd64_sharedpage;
+SYSCTL_INT(_hw, OID_AUTO, lower_amd64_sharedpage, CTLFLAG_RDTUN,
+ &hw_lower_amd64_sharedpage, 0,
+ "Lower sharedpage to work around Ryzen issue with executing code near the top of user memory");
/*
* -1: automatic (default)
* 0: keep enable CLFLUSH
@@ -122,6 +127,28 @@ init_amd(void)
wrmsr(0xc0011020, msr);
}
}
+
+ /*
+ * Work around a problem on Ryzen that is triggered by executing
+ * code near the top of user memory, in our case the signal
+ * trampoline code in the shared page on amd64.
+ *
+ * This function is executed once for the BSP before tunables take
+ * effect so the value determined here can be overridden by the
+ * tunable. This function is then executed again for each AP and
+ * also on resume. Set a flag the first time so that value set by
+ * the tunable is not overwritten.
+ *
+ * The stepping and/or microcode versions should be checked after
+ * this issue is fixed by AMD so that we don't use this mode if not
+ * needed.
+ */
+ if (lower_sharedpage_init == 0) {
+ lower_sharedpage_init = 1;
+ if (CPUID_TO_FAMILY(cpu_id) == 0x17) {
+ hw_lower_amd64_sharedpage = 1;
+ }
+ }
}
/*
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index dd5bb06..51b8433 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
fpstate_drop(td);
+ update_pcb_bases(pcb);
sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
bzero(sf.sf_uc.uc_mcontext.mc_spare,
@@ -452,7 +453,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
- set_pcb_flags(pcb, PCB_FULL_IRET);
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -558,6 +558,7 @@ sys_sigreturn(td, uap)
return (ret);
}
bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
+ update_pcb_bases(pcb);
pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
@@ -569,7 +570,6 @@ sys_sigreturn(td, uap)
#endif
kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
- set_pcb_flags(pcb, PCB_FULL_IRET);
return (EJUSTRETURN);
}
@@ -597,11 +597,11 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
else
mtx_unlock(&dt_lock);
+ update_pcb_bases(pcb);
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
clear_pcb_flags(pcb, PCB_32BIT);
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
- set_pcb_flags(pcb, PCB_FULL_IRET);
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = imgp->entry_addr;
@@ -1572,6 +1572,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
kmdp = init_ops.parse_preload_data(modulep);
identify_cpu1();
+ identify_hypervisor();
/* Init basic tunables, hz etc */
init_param1();
@@ -2193,6 +2194,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
mcp->mc_flags = tp->tf_flags;
mcp->mc_len = sizeof(*mcp);
get_fpcontext(td, mcp, NULL, 0);
+ update_pcb_bases(pcb);
mcp->mc_fsbase = pcb->pcb_fsbase;
mcp->mc_gsbase = pcb->pcb_gsbase;
mcp->mc_xfpustate = 0;
@@ -2263,11 +2265,11 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
tp->tf_fs = mcp->mc_fs;
tp->tf_gs = mcp->mc_gs;
}
+ set_pcb_flags(pcb, PCB_FULL_IRET);
if (mcp->mc_flags & _MC_HASBASES) {
pcb->pcb_fsbase = mcp->mc_fsbase;
pcb->pcb_gsbase = mcp->mc_gsbase;
}
- set_pcb_flags(pcb, PCB_FULL_IRET);
return (0);
}
@@ -2298,7 +2300,6 @@ static int
set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
size_t xfpustate_len)
{
- struct savefpu *fpstate;
int error;
if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
@@ -2311,9 +2312,8 @@ set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
error = 0;
} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
- fpstate = (struct savefpu *)&mcp->mc_fpstate;
- fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
- error = fpusetregs(td, fpstate, xfpustate, xfpustate_len);
+ error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate,
+ xfpustate, xfpustate_len);
} else
return (EINVAL);
return (error);
@@ -2538,6 +2538,71 @@ user_dbreg_trap(void)
return 0;
}
+/*
+ * The pcb_flags is only modified by current thread, or by other threads
+ * when current thread is stopped. However, current thread may change it
+ * from the interrupt context in cpu_switch(), or in the trap handler.
+ * When we read-modify-write pcb_flags from C sources, compiler may generate
+ * code that is not atomic regarding the interrupt handler. If a trap or
+ * interrupt happens and any flag is modified from the handler, it can be
+ * clobbered with the cached value later. Therefore, we implement setting
+ * and clearing flags with single-instruction functions, which do not race
+ * with possible modification of the flags from the trap or interrupt context,
+ * because traps and interrupts are executed only on instruction boundary.
+ */
+void
+set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
+{
+
+ __asm __volatile("orl %1,%0"
+ : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
+ : "cc", "memory");
+
+}
+
+/*
+ * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
+ * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
+ * pcb if user space modified the bases. We must save on the context
+ * switch or if the return to usermode happens through the doreti.
+ *
+ * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
+ * which have a consequence that the base MSRs must be saved each time
+ * the PCB_FULL_IRET flag is set. We disable interrupts to sync with
+ * context switches.
+ */
+void
+set_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+ register_t r;
+
+ if (curpcb == pcb &&
+ (flags & PCB_FULL_IRET) != 0 &&
+ (pcb->pcb_flags & PCB_FULL_IRET) == 0 &&
+ (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) {
+ r = intr_disable();
+ if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
+ if (rfs() == _ufssel)
+ pcb->pcb_fsbase = rdfsbase();
+ if (rgs() == _ugssel)
+ pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
+ }
+ set_pcb_flags_raw(pcb, flags);
+ intr_restore(r);
+ } else {
+ set_pcb_flags_raw(pcb, flags);
+ }
+}
+
+void
+clear_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+
+ __asm __volatile("andl %1,%0"
+ : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
+ : "cc", "memory");
+}
+
#ifdef KDB
/*
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 2989eb40..64abe9b 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -2566,9 +2566,8 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
/*
* allocate the page directory page
*/
- while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
- VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
- VM_WAIT;
+ pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK);
pml4phys = VM_PAGE_TO_PHYS(pml4pg);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
diff --git a/sys/amd64/amd64/ptrace_machdep.c b/sys/amd64/amd64/ptrace_machdep.c
index dbb3f13..169e15c 100644
--- a/sys/amd64/amd64/ptrace_machdep.c
+++ b/sys/amd64/amd64/ptrace_machdep.c
@@ -117,15 +117,17 @@ cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
static void
cpu_ptrace_setbase(struct thread *td, int req, register_t r)
{
+ struct pcb *pcb;
+ pcb = td->td_pcb;
+ set_pcb_flags(pcb, PCB_FULL_IRET);
if (req == PT_SETFSBASE) {
- td->td_pcb->pcb_fsbase = r;
+ pcb->pcb_fsbase = r;
td->td_frame->tf_fs = _ufssel;
} else {
- td->td_pcb->pcb_gsbase = r;
+ pcb->pcb_gsbase = r;
td->td_frame->tf_gs = _ugssel;
}
- set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
}
#ifdef COMPAT_FREEBSD32
@@ -136,6 +138,7 @@ static int
cpu32_ptrace(struct thread *td, int req, void *addr, int data)
{
struct savefpu *fpstate;
+ struct pcb *pcb;
uint32_t r;
int error;
@@ -167,8 +170,10 @@ cpu32_ptrace(struct thread *td, int req, void *addr, int data)
error = EINVAL;
break;
}
- r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase :
- td->td_pcb->pcb_gsbase;
+ pcb = td->td_pcb;
+ if (td == curthread)
+ update_pcb_bases(pcb);
+ r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase;
error = copyout(&r, addr, sizeof(r));
break;
@@ -197,6 +202,7 @@ int
cpu_ptrace(struct thread *td, int req, void *addr, int data)
{
register_t *r, rv;
+ struct pcb *pcb;
int error;
#ifdef COMPAT_FREEBSD32
@@ -221,8 +227,10 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data)
case PT_GETFSBASE:
case PT_GETGSBASE:
- r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase :
- &td->td_pcb->pcb_gsbase;
+ pcb = td->td_pcb;
+ if (td == curthread)
+ update_pcb_bases(pcb);
+ r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase;
error = copyout(r, addr, sizeof(*r));
break;
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index f6be94e..aed23d6 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -51,7 +51,6 @@ ENTRY(bzero)
movq %rsi,%rcx
xorl %eax,%eax
shrq $3,%rcx
- cld
rep
stosq
movq %rsi,%rcx
@@ -77,7 +76,6 @@ ENTRY(bcmp)
PUSH_FRAME_POINTER
movq %rdx,%rcx
shrq $3,%rcx
- cld /* compare forwards */
repe
cmpsq
jne 1f
@@ -109,7 +107,6 @@ ENTRY(bcopy)
jb 1f
shrq $3,%rcx /* copy by 64-bit words */
- cld /* nope, copy forwards */
rep
movsq
movq %rdx,%rcx
@@ -148,7 +145,6 @@ ENTRY(memcpy)
movq %rdi,%rax
movq %rdx,%rcx
shrq $3,%rcx /* copy by 64-bit words */
- cld /* copy forwards */
rep
movsq
movq %rdx,%rcx
@@ -195,7 +191,6 @@ ENTRY(fillw)
movq %rdi,%rax
movq %rsi,%rdi
movq %rdx,%rcx
- cld
rep
stosw
POP_FRAME_POINTER
@@ -215,7 +210,7 @@ END(fillw)
*/
/*
- * copyout(from_kernel, to_user, len) - MP SAFE
+ * copyout(from_kernel, to_user, len)
* %rdi, %rsi, %rdx
*/
ENTRY(copyout)
@@ -226,12 +221,11 @@ ENTRY(copyout)
jz done_copyout
/*
- * Check explicitly for non-user addresses. If 486 write protection
- * is being used, this check is essential because we are in kernel
- * mode so the h/w does not provide any protection against writing
- * kernel addresses.
+ * Check explicitly for non-user addresses. This check is essential
+ * because it prevents usermode from writing into the kernel. We do
+ * not verify anywhere else that the user did not specify a rogue
+ * address.
*/
-
/*
* First, prevent address wrapping.
*/
@@ -253,7 +247,6 @@ ENTRY(copyout)
movq %rdx,%rcx
shrq $3,%rcx
- cld
rep
movsq
movb %dl,%cl
@@ -278,7 +271,7 @@ copyout_fault:
END(copyout)
/*
- * copyin(from_user, to_kernel, len) - MP SAFE
+ * copyin(from_user, to_kernel, len)
* %rdi, %rsi, %rdx
*/
ENTRY(copyin)
@@ -302,7 +295,6 @@ ENTRY(copyin)
movq %rdx,%rcx
movb %cl,%al
shrq $3,%rcx /* copy longword-wise */
- cld
rep
movsq
movb %al,%cl
@@ -495,7 +487,7 @@ fusufault:
/*
* Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
- * user memory. All these functions are MPSAFE.
+ * user memory.
* addr = %rdi, value = %rsi
*/
ALTENTRY(suword64)
@@ -570,7 +562,7 @@ ENTRY(subyte)
END(subyte)
/*
- * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
+ * copyinstr(from, to, maxlen, int *lencopied)
* %rdi, %rsi, %rdx, %rcx
*
* copy a string from from to to, stop when a 0 character is reached.
@@ -599,7 +591,6 @@ ENTRY(copyinstr)
movq %rax,%r8
1:
incq %rdx
- cld
2:
decq %rdx
@@ -641,7 +632,7 @@ cpystrflt_x:
END(copyinstr)
/*
- * copystr(from, to, maxlen, int *lencopied) - MP SAFE
+ * copystr(from, to, maxlen, int *lencopied)
* %rdi, %rsi, %rdx, %rcx
*/
ENTRY(copystr)
@@ -650,7 +641,6 @@ ENTRY(copystr)
xchgq %rdi,%rsi
incq %rdx
- cld
1:
decq %rdx
jz 4f
@@ -681,7 +671,6 @@ END(copystr)
/*
* Handling of special amd64 registers and descriptor tables etc
- * %rdi
*/
/* void lgdt(struct region_descriptor *rdp); */
ENTRY(lgdt)
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index 8867aed..42cae4a 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -256,39 +256,45 @@ sysarch(td, uap)
error = amd64_set_ioperm(td, &iargs);
break;
case I386_GET_FSBASE:
+ update_pcb_bases(pcb);
i386base = pcb->pcb_fsbase;
error = copyout(&i386base, uap->parms, sizeof(i386base));
break;
case I386_SET_FSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
+ set_pcb_flags(pcb, PCB_FULL_IRET);
pcb->pcb_fsbase = i386base;
td->td_frame->tf_fs = _ufssel;
update_gdt_fsbase(td, i386base);
}
break;
case I386_GET_GSBASE:
+ update_pcb_bases(pcb);
i386base = pcb->pcb_gsbase;
error = copyout(&i386base, uap->parms, sizeof(i386base));
break;
case I386_SET_GSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
+ set_pcb_flags(pcb, PCB_FULL_IRET);
pcb->pcb_gsbase = i386base;
td->td_frame->tf_gs = _ugssel;
update_gdt_gsbase(td, i386base);
}
break;
case AMD64_GET_FSBASE:
- error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
+ update_pcb_bases(pcb);
+ error = copyout(&pcb->pcb_fsbase, uap->parms,
+ sizeof(pcb->pcb_fsbase));
break;
case AMD64_SET_FSBASE:
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
- pcb->pcb_fsbase = a64base;
set_pcb_flags(pcb, PCB_FULL_IRET);
+ pcb->pcb_fsbase = a64base;
td->td_frame->tf_fs = _ufssel;
} else
error = EINVAL;
@@ -296,15 +302,17 @@ sysarch(td, uap)
break;
case AMD64_GET_GSBASE:
- error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
+ update_pcb_bases(pcb);
+ error = copyout(&pcb->pcb_gsbase, uap->parms,
+ sizeof(pcb->pcb_gsbase));
break;
case AMD64_SET_GSBASE:
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
- pcb->pcb_gsbase = a64base;
set_pcb_flags(pcb, PCB_FULL_IRET);
+ pcb->pcb_gsbase = a64base;
td->td_frame->tf_gs = _ugssel;
} else
error = EINVAL;
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index fccd297..a553fc5 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -99,9 +99,8 @@ PMC_SOFT_DEFINE( , , page_fault, write);
#include <sys/dtrace_bsd.h>
#endif
-extern void __noinline trap(struct trapframe *frame);
-extern void trap_check(struct trapframe *frame);
-extern void syscall(struct trapframe *frame);
+void __noinline trap(struct trapframe *frame);
+void trap_check(struct trapframe *frame);
void dblfault_handler(struct trapframe *frame);
static int trap_pfault(struct trapframe *, int);
@@ -166,37 +165,41 @@ trap(struct trapframe *frame)
#ifdef KDTRACE_HOOKS
struct reg regs;
#endif
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
+ ksiginfo_t ksi;
+ struct thread *td;
+ struct proc *p;
+ register_t addr;
#ifdef KDB
register_t dr6;
#endif
- int i = 0, ucode = 0;
+ int signo, ucode;
u_int type;
- register_t addr = 0;
- ksiginfo_t ksi;
+
+ td = curthread;
+ p = td->td_proc;
+ signo = 0;
+ ucode = 0;
+ addr = 0;
PCPU_INC(cnt.v_trap);
type = frame->tf_trapno;
#ifdef SMP
/* Handler for NMI IPIs used for stopping CPUs. */
- if (type == T_NMI) {
- if (ipi_nmi_handler() == 0)
- goto out;
- }
-#endif /* SMP */
+ if (type == T_NMI && ipi_nmi_handler() == 0)
+ return;
+#endif
#ifdef KDB
if (kdb_active) {
kdb_reenter();
- goto out;
+ return;
}
#endif
if (type == T_RESERVED) {
trap_fatal(frame, 0);
- goto out;
+ return;
}
if (type == T_NMI) {
@@ -209,12 +212,12 @@ trap(struct trapframe *frame)
*/
if (pmc_intr != NULL &&
(*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
- goto out;
+ return;
#endif
#ifdef STACK
if (stack_nmi_handler(frame) != 0)
- goto out;
+ return;
#endif
}
@@ -259,7 +262,7 @@ trap(struct trapframe *frame)
switch (type) {
case T_PRIVINFLT: /* privileged instruction fault */
- i = SIGILL;
+ signo = SIGILL;
ucode = ILL_PRVOPC;
break;
@@ -271,41 +274,41 @@ trap(struct trapframe *frame)
fill_frame_regs(frame, &regs);
if (dtrace_pid_probe_ptr != NULL &&
dtrace_pid_probe_ptr(&regs) == 0)
- goto out;
+ return;
}
#endif
frame->tf_rflags &= ~PSL_T;
- i = SIGTRAP;
+ signo = SIGTRAP;
ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
break;
case T_ARITHTRAP: /* arithmetic trap */
ucode = fputrap_x87();
if (ucode == -1)
- goto userout;
- i = SIGFPE;
+ return;
+ signo = SIGFPE;
break;
case T_PROTFLT: /* general protection fault */
- i = SIGBUS;
+ signo = SIGBUS;
ucode = BUS_OBJERR;
break;
case T_STKFLT: /* stack fault */
case T_SEGNPFLT: /* segment not present fault */
- i = SIGBUS;
+ signo = SIGBUS;
ucode = BUS_ADRERR;
break;
case T_TSSFLT: /* invalid TSS fault */
- i = SIGBUS;
+ signo = SIGBUS;
ucode = BUS_OBJERR;
break;
case T_ALIGNFLT:
- i = SIGBUS;
+ signo = SIGBUS;
ucode = BUS_ADRALN;
break;
case T_DOUBLEFLT: /* double fault */
default:
- i = SIGBUS;
+ signo = SIGBUS;
ucode = BUS_OBJERR;
break;
@@ -315,67 +318,64 @@ trap(struct trapframe *frame)
*/
if (*p->p_sysent->sv_trap != NULL &&
(*p->p_sysent->sv_trap)(td) == 0)
- goto userout;
+ return;
addr = frame->tf_addr;
- i = trap_pfault(frame, TRUE);
- if (i == -1)
- goto userout;
- if (i == 0)
- goto user;
-
- if (i == SIGSEGV)
+ signo = trap_pfault(frame, TRUE);
+ if (signo == -1)
+ return;
+ if (signo == 0)
+ goto userret;
+ if (signo == SIGSEGV) {
ucode = SEGV_MAPERR;
- else {
- if (prot_fault_translation == 0) {
- /*
- * Autodetect.
- * This check also covers the images
- * without the ABI-tag ELF note.
- */
- if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
- && p->p_osrel >= P_OSREL_SIGSEGV) {
- i = SIGSEGV;
- ucode = SEGV_ACCERR;
- } else {
- i = SIGBUS;
- ucode = BUS_PAGE_FAULT;
- }
- } else if (prot_fault_translation == 1) {
- /*
- * Always compat mode.
- */
- i = SIGBUS;
- ucode = BUS_PAGE_FAULT;
- } else {
- /*
- * Always SIGSEGV mode.
- */
- i = SIGSEGV;
+ } else if (prot_fault_translation == 0) {
+ /*
+ * Autodetect. This check also covers
+ * the images without the ABI-tag ELF
+ * note.
+ */
+ if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
+ p->p_osrel >= P_OSREL_SIGSEGV) {
+ signo = SIGSEGV;
ucode = SEGV_ACCERR;
+ } else {
+ signo = SIGBUS;
+ ucode = BUS_PAGE_FAULT;
}
+ } else if (prot_fault_translation == 1) {
+ /*
+ * Always compat mode.
+ */
+ signo = SIGBUS;
+ ucode = BUS_PAGE_FAULT;
+ } else {
+ /*
+ * Always SIGSEGV mode.
+ */
+ signo = SIGSEGV;
+ ucode = SEGV_ACCERR;
}
break;
case T_DIVIDE: /* integer divide fault */
ucode = FPE_INTDIV;
- i = SIGFPE;
+ signo = SIGFPE;
break;
#ifdef DEV_ISA
case T_NMI:
nmi_handle_intr(type, frame);
- break;
-#endif /* DEV_ISA */
+ return;
+#endif
case T_OFLOW: /* integer overflow fault */
ucode = FPE_INTOVF;
- i = SIGFPE;
+ signo = SIGFPE;
break;
case T_BOUND: /* bounds check fault */
ucode = FPE_FLTSUB;
- i = SIGFPE;
+ signo = SIGFPE;
break;
case T_DNA:
@@ -383,27 +383,26 @@ trap(struct trapframe *frame)
KASSERT(PCB_USER_FPU(td->td_pcb),
("kernel FPU ctx has leaked"));
fpudna();
- goto userout;
+ return;
case T_FPOPFLT: /* FPU operand fetch fault */
ucode = ILL_COPROC;
- i = SIGILL;
+ signo = SIGILL;
break;
case T_XMMFLT: /* SIMD floating-point exception */
ucode = fputrap_sse();
if (ucode == -1)
- goto userout;
- i = SIGFPE;
+ return;
+ signo = SIGFPE;
break;
#ifdef KDTRACE_HOOKS
case T_DTRACE_RET:
enable_intr();
fill_frame_regs(frame, &regs);
- if (dtrace_return_probe_ptr != NULL &&
- dtrace_return_probe_ptr(&regs) == 0)
- goto out;
- break;
+ if (dtrace_return_probe_ptr != NULL)
+ dtrace_return_probe_ptr(&regs);
+ return;
#endif
}
} else {
@@ -414,13 +413,13 @@ trap(struct trapframe *frame)
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(frame, FALSE);
- goto out;
+ return;
case T_DNA:
if (PCB_USER_FPU(td->td_pcb))
panic("Unregistered use of FPU in kernel");
fpudna();
- goto out;
+ return;
case T_ARITHTRAP: /* arithmetic trap */
case T_XMMFLT: /* SIMD floating-point exception */
@@ -430,7 +429,7 @@ trap(struct trapframe *frame)
* registration for FPU traps is overkill.
*/
trap_fatal(frame, 0);
- goto out;
+ return;
case T_STKFLT: /* stack fault */
case T_PROTFLT: /* general protection fault */
@@ -469,35 +468,35 @@ trap(struct trapframe *frame)
sizeof(register_t))
frame->tf_rsp = PCPU_GET(rsp0) - 5 *
sizeof(register_t);
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_ds) {
frame->tf_rip = (long)ds_load_fault;
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_es) {
frame->tf_rip = (long)es_load_fault;
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_fs) {
frame->tf_rip = (long)fs_load_fault;
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_gs) {
frame->tf_rip = (long)gs_load_fault;
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_gsbase) {
frame->tf_rip = (long)gsbase_load_fault;
- goto out;
+ return;
}
if (frame->tf_rip == (long)ld_fsbase) {
frame->tf_rip = (long)fsbase_load_fault;
- goto out;
+ return;
}
if (curpcb->pcb_onfault != NULL) {
frame->tf_rip = (long)curpcb->pcb_onfault;
- goto out;
+ return;
}
break;
@@ -513,7 +512,7 @@ trap(struct trapframe *frame)
*/
if (frame->tf_rflags & PSL_NT) {
frame->tf_rflags &= ~PSL_NT;
- goto out;
+ return;
}
break;
@@ -534,7 +533,7 @@ trap(struct trapframe *frame)
* processor doesn't
*/
load_dr6(rdr6() & ~0xf);
- goto out;
+ return;
}
/*
* FALLTHROUGH (TRCTRAP kernel mode, kernel address)
@@ -549,27 +548,27 @@ trap(struct trapframe *frame)
dr6 = rdr6();
load_dr6(dr6 & ~0x4000);
if (kdb_trap(type, dr6, frame))
- goto out;
+ return;
#endif
break;
#ifdef DEV_ISA
case T_NMI:
nmi_handle_intr(type, frame);
- goto out;
-#endif /* DEV_ISA */
+ return;
+#endif
}
trap_fatal(frame, 0);
- goto out;
+ return;
}
/* Translate fault for emulators (e.g. Linux) */
- if (*p->p_sysent->sv_transtrap)
- i = (*p->p_sysent->sv_transtrap)(i, type);
+ if (*p->p_sysent->sv_transtrap != NULL)
+ signo = (*p->p_sysent->sv_transtrap)(signo, type);
ksiginfo_init_trap(&ksi);
- ksi.ksi_signo = i;
+ ksi.ksi_signo = signo;
ksi.ksi_code = ucode;
ksi.ksi_trapno = type;
ksi.ksi_addr = (void *)addr;
@@ -577,8 +576,8 @@ trap(struct trapframe *frame)
uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
"addr 0x%lx rsp 0x%lx rip 0x%lx "
"<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
- p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
- frame->tf_rsp, frame->tf_rip,
+ p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
+ addr, frame->tf_rsp, frame->tf_rip,
fubyte((void *)(frame->tf_rip + 0)),
fubyte((void *)(frame->tf_rip + 1)),
fubyte((void *)(frame->tf_rip + 2)),
@@ -590,14 +589,10 @@ trap(struct trapframe *frame)
}
KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
trapsignal(td, &ksi);
-
-user:
+userret:
userret(td, frame);
KASSERT(PCB_USER_FPU(td->td_pcb),
("Return from trap with kernel FPU ctx leaked"));
-userout:
-out:
- return;
}
/*
@@ -617,17 +612,20 @@ trap_check(struct trapframe *frame)
}
static int
-trap_pfault(frame, usermode)
- struct trapframe *frame;
- int usermode;
+trap_pfault(struct trapframe *frame, int usermode)
{
- vm_offset_t va;
+ struct thread *td;
+ struct proc *p;
vm_map_t map;
- int rv = 0;
+ vm_offset_t va;
+ int rv;
vm_prot_t ftype;
- struct thread *td = curthread;
- struct proc *p = td->td_proc;
- vm_offset_t eva = frame->tf_addr;
+ vm_offset_t eva;
+
+ td = curthread;
+ p = td->td_proc;
+ eva = frame->tf_addr;
+ rv = 0;
if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
/*
@@ -854,16 +852,18 @@ dblfault_handler(struct trapframe *frame)
}
int
-cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+cpu_fetch_syscall_args(struct thread *td)
{
struct proc *p;
struct trapframe *frame;
register_t *argp;
+ struct syscall_args *sa;
caddr_t params;
int reg, regcnt, error;
p = td->td_proc;
frame = td->td_frame;
+ sa = &td->td_sa;
reg = 0;
regcnt = 6;
@@ -914,7 +914,6 @@ cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
void
amd64_syscall(struct thread *td, int traced)
{
- struct syscall_args sa;
int error;
ksiginfo_t ksi;
@@ -924,7 +923,7 @@ amd64_syscall(struct thread *td, int traced)
/* NOT REACHED */
}
#endif
- error = syscallenter(td, &sa);
+ error = syscallenter(td);
/*
* Traced syscall.
@@ -940,15 +939,16 @@ amd64_syscall(struct thread *td, int traced)
KASSERT(PCB_USER_FPU(td->td_pcb),
("System call %s returning with kernel FPU ctx leaked",
- syscallname(td->td_proc, sa.code)));
+ syscallname(td->td_proc, td->td_sa.code)));
KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
("System call %s returning with mangled pcb_save",
- syscallname(td->td_proc, sa.code)));
+ syscallname(td->td_proc, td->td_sa.code)));
KASSERT(td->td_md.md_invl_gen.gen == 0,
("System call %s returning with leaked invl_gen %lu",
- syscallname(td->td_proc, sa.code), td->td_md.md_invl_gen.gen));
+ syscallname(td->td_proc, td->td_sa.code),
+ td->td_md.md_invl_gen.gen));
- syscallret(td, error, &sa);
+ syscallret(td, error);
/*
* If the user-supplied value of %rip is not a canonical
@@ -958,6 +958,6 @@ amd64_syscall(struct thread *td, int traced)
* not be safe. Instead, use the full return path which
* catches the problem safely.
*/
- if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS)
+ if (__predict_false(td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS))
set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
}
diff --git a/sys/amd64/amd64/uma_machdep.c b/sys/amd64/amd64/uma_machdep.c
index db566ae..220d095 100644
--- a/sys/amd64/amd64/uma_machdep.c
+++ b/sys/amd64/amd64/uma_machdep.c
@@ -46,20 +46,12 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait)
vm_page_t m;
vm_paddr_t pa;
void *va;
- int pflags;
*flags = UMA_SLAB_PRIV;
- pflags = malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
- for (;;) {
- m = vm_page_alloc(NULL, 0, pflags);
- if (m == NULL) {
- if (wait & M_NOWAIT)
- return (NULL);
- else
- VM_WAIT;
- } else
- break;
- }
+ m = vm_page_alloc(NULL, 0,
+ malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
+ if (m == NULL)
+ return (NULL);
pa = m->phys_addr;
if ((wait & M_NODUMP) == 0)
dump_add_page(pa);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 8846eb8..d95bb56 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -176,6 +176,7 @@ cpu_fork(td1, p2, td2, flags)
/* Ensure that td1's pcb is up to date. */
fpuexit(td1);
+ update_pcb_bases(td1->td_pcb);
/* Point the pcb to the top of the stack */
pcb2 = get_pcb_td(td2);
@@ -242,7 +243,7 @@ cpu_fork(td1, p2, td2, flags)
pcb2->pcb_tssp = NULL;
/* New segment registers. */
- set_pcb_flags(pcb2, PCB_FULL_IRET);
+ set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
/* Copy the LDT, if necessary. */
mdp1 = &td1->td_proc->p_md;
@@ -439,13 +440,14 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
* Those not loaded individually below get their default
* values here.
*/
+ update_pcb_bases(td0->td_pcb);
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE |
PCB_KERNFPU);
pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
cpu_max_ext_state_size);
- set_pcb_flags(pcb2, PCB_FULL_IRET);
+ set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
/*
* Create a new fresh stack for the new thread.
diff --git a/sys/amd64/cloudabi32/cloudabi32_sysvec.c b/sys/amd64/cloudabi32/cloudabi32_sysvec.c
index abede1a..d5dd58c 100644
--- a/sys/amd64/cloudabi32/cloudabi32_sysvec.c
+++ b/sys/amd64/cloudabi32/cloudabi32_sysvec.c
@@ -90,11 +90,15 @@ cloudabi32_proc_setregs(struct thread *td, struct image_params *imgp,
}
static int
-cloudabi32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+cloudabi32_fetch_syscall_args(struct thread *td)
{
- struct trapframe *frame = td->td_frame;
+ struct trapframe *frame;
+ struct syscall_args *sa;
int error;
+ frame = td->td_frame;
+ sa = &td->td_sa;
+
/* Obtain system call number. */
sa->code = frame->tf_rax;
if (sa->code >= CLOUDABI32_SYS_MAXSYSCALL)
diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c
index 84f0cb3..679888b 100644
--- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c
+++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c
@@ -87,9 +87,13 @@ cloudabi64_proc_setregs(struct thread *td, struct image_params *imgp,
}
static int
-cloudabi64_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+cloudabi64_fetch_syscall_args(struct thread *td)
{
- struct trapframe *frame = td->td_frame;
+ struct trapframe *frame;
+ struct syscall_args *sa;
+
+ frame = td->td_frame;
+ sa = &td->td_sa;
/* Obtain system call number. */
sa->code = frame->tf_rax;
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index de7cbe6..b4eb43d 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -29,6 +29,7 @@ options PREEMPTION # Enable kernel thread preemption
options INET # InterNETworking
options INET6 # IPv6 communications protocols
options IPSEC # IP (v4/v6) security
+options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5
options TCP_OFFLOAD # TCP offload
options SCTP # Stream Control Transmission Protocol
options FFS # Berkeley Fast Filesystem
diff --git a/sys/amd64/conf/pfSense b/sys/amd64/conf/pfSense
new file mode 100644
index 0000000..26c071e
--- /dev/null
+++ b/sys/amd64/conf/pfSense
@@ -0,0 +1,182 @@
+include GENERIC
+
+nooptions KDB_TRACE
+options DDB
+
+ident pfSense
+
+nooptions MAC # TrustedBSD MAC Framework
+nooptions COMPAT_FREEBSD4 # Compatible with FreeBSD4
+nooptions COMPAT_FREEBSD5 # Compatible with FreeBSD5
+nooptions COMPAT_FREEBSD6 # Compatible with FreeBSD6
+nooptions COMPAT_FREEBSD7 # Compatible with FreeBSD7
+
+options GEOM_MIRROR
+options GEOM_UZIP
+options GEOM_ELI
+options GEOM_BDE
+
+options TMPFS
+options UNIONFS
+options NULLFS
+options PPS_SYNC
+
+# Wireless
+#nooptions IEEE80211_DEBUG # enable debug msgs
+device wlan_rssadapt
+device wlan_xauth
+device wlan_acl
+device iwifw
+device ipwfw # Firmware for Intel PRO/Wireless 2100 IEEE 802.11 driver
+device wpifw # Firmware for Intel 3945ABG Wireless LAN IEEE 802.11 driver
+device iwnfw # Firmware for Intel Wireless WiFi Link 4965AGN IEEE 802.11n driver
+device uath # Atheros USB IEEE 802.11a/b/g wireless network device
+device ralfw # Firmware for Ralink Technology RT2500 wireless NICs.
+device ural # Ralink Technology RT2500USB IEEE 802.11 driver
+device urtw # Realtek RTL8187B/L USB IEEE 802.11b/g wireless network device
+device rum # Ralink Technology USB IEEE 802.11a/b/g wireless network device
+device mwlfw # Firmware for Marvell 88W8363 IEEE 802.11n wireless network driver
+device zyd # ZyDAS ZD1211/ZD1211B USB IEEE 802.11b/g wireless network device
+device upgt # Conexant/Intersil PrismGT SoftMAC USB IEEE 802.11b/g wireless
+device udav # Davicom DM9601 USB Ethernet driver
+device axe
+device axge
+device aue
+device cue
+device kue
+device mos
+device rsu
+device rsufw
+device run # Ralink RT2700U/RT2800U/RT3000U USB 802.11agn
+device runfw
+device rue
+device rtwn
+device rtwnfw
+device siba_bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver
+device bwn # Broadcom BCM43xx IEEE 802.11b/g wireless network driver
+device bwi # Broadcom BCM43xx IEEE 802.11b/g wireless network driver
+
+# Pseudo devices.
+#device pty # Pseudo-ttys (telnet etc)
+
+# USB support
+nooptions USB_DEBUG # enable debug msgs
+
+# 3G devices
+device ufoma
+device ucom
+device uslcom
+device uplcom
+device umct
+device uvisor
+device uark
+device uftdi
+device uvscom
+device umodem
+device u3g
+device cdce
+
+device uhid # "Human Interface Devices"
+
+# FireWire support
+device firewire # FireWire bus code
+device sbp # SCSI over FireWire (Requires scbus and da)
+
+# pfsense addons
+
+device tap
+device gre
+device if_bridge
+device carp
+device lagg
+device vte
+
+# IP/IPFW
+options IPFIREWALL_DEFAULT_TO_ACCEPT
+options IPFIREWALL_VERBOSE
+options IPSTEALTH
+
+# Netgraph
+options NETGRAPH #netgraph(4) system
+options NETGRAPH_VLAN
+options NETGRAPH_L2TP
+options NETGRAPH_BPF
+options NETGRAPH_ETHER
+options NETGRAPH_IFACE
+options NETGRAPH_EIFACE
+options NETGRAPH_PPP
+options NETGRAPH_PPPOE
+options NETGRAPH_PPTPGRE
+options NETGRAPH_RFC1490
+options NETGRAPH_SOCKET
+options NETGRAPH_TTY
+options NETGRAPH_MPPC_ENCRYPTION
+options NETGRAPH_UI
+options NETGRAPH_VJC
+options NETGRAPH_KSOCKET
+options NETGRAPH_LMI
+options NETGRAPH_ONE2MANY
+options NETGRAPH_BRIDGE
+options NETGRAPH_CISCO
+options NETGRAPH_ECHO
+options NETGRAPH_ASYNC
+options NETGRAPH_FRAME_RELAY
+options NETGRAPH_HOLE
+options NETGRAPH_TEE
+options NETGRAPH_TCPMSS
+options NETGRAPH_PIPE
+options NETGRAPH_CAR
+options NETGRAPH_DEFLATE
+options NETGRAPH_PRED1
+
+# IPSEC
+options TCP_SIGNATURE
+device enc
+
+# ALTQ
+options ALTQ
+options ALTQ_CBQ
+options ALTQ_RED
+options ALTQ_RIO
+options ALTQ_HFSC
+options ALTQ_PRIQ
+options ALTQ_FAIRQ
+options ALTQ_NOPCC
+options ALTQ_CODEL
+
+# Squid related settings
+options MSGMNB=8192 # max # of bytes in a queue
+options MSGMNI=40 # number of message queue identifiers
+options MSGSEG=512 # number of message segments per queue
+options MSGSSZ=32 # size of a message segment
+options MSGTQL=2048 # max messages in system
+
+device pf
+device pflog
+device pfsync
+
+device rndtest # FIPS 140-2 entropy tester
+device hifn # Hifn 7951, 7781, etc.
+options HIFN_DEBUG # enable debugging support: hw.hifn.debug
+options HIFN_RNDTEST # enable rndtest support
+device ubsec # Broadcom 5501, 5601, 58xx
+device safe # safe -- SafeNet crypto accelerator
+device padlock
+
+device speaker
+
+options MROUTING
+
+# Additional cards
+device mxge # mxge - Myricom Myri10GE 10 Gigabit Ethernet adapter driver
+device cxgb # cxgb -- Chelsio T3 10 Gigabit Ethernet adapter driver
+device cxgbe # cxgbe -- Chelsio T5 10 Gigabit Ethernet adapter driver
+#device nve # nVidia nForce MCP on-board Ethernet Networking
+device oce
+
+# Default serial speed
+options CONSPEED=115200
+
+# Enable gpioapu
+#device gpioapu
+#device gpiorcc
diff --git a/sys/amd64/ia32/ia32_syscall.c b/sys/amd64/ia32/ia32_syscall.c
index c2bf2fb..f743440 100644
--- a/sys/amd64/ia32/ia32_syscall.c
+++ b/sys/amd64/ia32/ia32_syscall.c
@@ -106,16 +106,18 @@ ia32_set_syscall_retval(struct thread *td, int error)
}
int
-ia32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+ia32_fetch_syscall_args(struct thread *td)
{
struct proc *p;
struct trapframe *frame;
+ struct syscall_args *sa;
caddr_t params;
u_int32_t args[8], tmp;
int error, i;
p = td->td_proc;
frame = td->td_frame;
+ sa = &td->td_sa;
params = (caddr_t)frame->tf_rsp + sizeof(u_int32_t);
sa->code = frame->tf_rax;
@@ -176,7 +178,6 @@ void
ia32_syscall(struct trapframe *frame)
{
struct thread *td;
- struct syscall_args sa;
register_t orig_tf_rflags;
int error;
ksiginfo_t ksi;
@@ -185,7 +186,7 @@ ia32_syscall(struct trapframe *frame)
td = curthread;
td->td_frame = frame;
- error = syscallenter(td, &sa);
+ error = syscallenter(td);
/*
* Traced syscall.
@@ -199,7 +200,7 @@ ia32_syscall(struct trapframe *frame)
trapsignal(td, &ksi);
}
- syscallret(td, error, &sa);
+ syscallret(td, error);
}
static void
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 5fa0d77..2107b2ba 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -651,6 +651,38 @@ load_gs(u_short sel)
}
#endif
+static __inline uint64_t
+rdfsbase(void)
+{
+ uint64_t x;
+
+ __asm __volatile("rdfsbase %0" : "=r" (x));
+ return (x);
+}
+
+static __inline void
+wrfsbase(uint64_t x)
+{
+
+ __asm __volatile("wrfsbase %0" : : "r" (x));
+}
+
+static __inline uint64_t
+rdgsbase(void)
+{
+ uint64_t x;
+
+ __asm __volatile("rdgsbase %0" : "=r" (x));
+ return (x);
+}
+
+static __inline void
+wrgsbase(uint64_t x)
+{
+
+ __asm __volatile("wrgsbase %0" : : "r" (x));
+}
+
static __inline void
bare_lgdt(struct region_descriptor *addr)
{
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index b81f497..f4d6e60 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -34,7 +34,8 @@
#include <x86/x86_var.h>
-extern uint64_t *vm_page_dump;
+extern uint64_t *vm_page_dump;
+extern int hw_lower_amd64_sharedpage;
extern int hw_ibrs_disable;
/*
@@ -44,9 +45,11 @@ extern int hw_ibrs_disable;
extern char kernphys[];
struct savefpu;
+struct sysentvec;
void amd64_conf_fast_syscall(void);
void amd64_db_resume_dbreg(void);
+void amd64_lower_shared_page(struct sysentvec *);
void amd64_syscall(struct thread *td, int traced);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 2b7bb6e..09aea36 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -119,40 +119,15 @@ struct susppcb {
#ifdef _KERNEL
struct trapframe;
-/*
- * The pcb_flags is only modified by current thread, or by other threads
- * when current thread is stopped. However, current thread may change it
- * from the interrupt context in cpu_switch(), or in the trap handler.
- * When we read-modify-write pcb_flags from C sources, compiler may generate
- * code that is not atomic regarding the interrupt handler. If a trap or
- * interrupt happens and any flag is modified from the handler, it can be
- * clobbered with the cached value later. Therefore, we implement setting
- * and clearing flags with single-instruction functions, which do not race
- * with possible modification of the flags from the trap or interrupt context,
- * because traps and interrupts are executed only on instruction boundary.
- */
-static __inline void
-set_pcb_flags(struct pcb *pcb, const u_int flags)
-{
-
- __asm __volatile("orl %1,%0"
- : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
- : "cc");
-}
-
-static __inline void
-clear_pcb_flags(struct pcb *pcb, const u_int flags)
-{
-
- __asm __volatile("andl %1,%0"
- : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
- : "cc");
-}
-
+void clear_pcb_flags(struct pcb *pcb, const u_int flags);
void makectx(struct trapframe *, struct pcb *);
+void set_pcb_flags(struct pcb *pcb, const u_int flags);
+void set_pcb_flags_raw(struct pcb *pcb, const u_int flags);
int savectx(struct pcb *) __returns_twice;
void resumectx(struct pcb *);
+/* Ensure that pcb_gsbase and pcb_fsbase are up to date */
+#define update_pcb_bases(pcb) set_pcb_flags((pcb), PCB_FULL_IRET)
#endif
#endif /* _AMD64_PCB_H_ */
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index f4b59aa..4c2b244 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -70,6 +70,13 @@ struct mdproc {
#define KINFO_PROC_SIZE 1088
#define KINFO_PROC32_SIZE 768
+struct syscall_args {
+ u_int code;
+ struct sysent *callp;
+ register_t args[8];
+ int narg;
+};
+
#ifdef _KERNEL
/* Get the current kernel thread stack usage. */
@@ -92,13 +99,6 @@ int amd64_set_ldt_data(struct thread *td, int start, int num,
extern struct mtx dt_lock;
extern int max_ldt_segment;
-
-struct syscall_args {
- u_int code;
- struct sysent *callp;
- register_t args[8];
- int narg;
-};
#endif /* _KERNEL */
#endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index 6e12d41..942819b 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -126,7 +126,7 @@ static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
static void linux_vdso_install(void *param);
static void linux_vdso_deinstall(void *param);
static void linux_set_syscall_retval(struct thread *td, int error);
-static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa);
+static int linux_fetch_syscall_args(struct thread *td);
static void linux_exec_setregs(struct thread *td, struct image_params *imgp,
u_long stack);
static int linux_vsyscall(struct thread *td);
@@ -217,13 +217,15 @@ translate_traps(int signal, int trap_code)
}
static int
-linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+linux_fetch_syscall_args(struct thread *td)
{
struct proc *p;
struct trapframe *frame;
+ struct syscall_args *sa;
p = td->td_proc;
frame = td->td_frame;
+ sa = &td->td_sa;
sa->args[0] = frame->tf_rdi;
sa->args[1] = frame->tf_rsi;
@@ -831,6 +833,8 @@ static void
linux_vdso_install(void *param)
{
+ amd64_lower_shared_page(&elf_linux_sysvec);
+
linux_szsigcode = (&_binary_linux_locore_o_end -
&_binary_linux_locore_o_start);
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index c264f3a..ea849ba 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -725,13 +725,15 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
}
static int
-linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
+linux32_fetch_syscall_args(struct thread *td)
{
struct proc *p;
struct trapframe *frame;
+ struct syscall_args *sa;
p = td->td_proc;
frame = td->td_frame;
+ sa = &td->td_sa;
sa->args[0] = frame->tf_rbx;
sa->args[1] = frame->tf_rcx;
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index 91b2c01..a48cb76 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/proc.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
@@ -356,7 +358,8 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
{
uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
- /* Save host MSRs (if any) and restore guest MSRs */
+ /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
+ update_pcb_bases(curpcb);
wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
OpenPOWER on IntegriCloud