summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2009-07-09 09:34:11 +0000
committerkib <kib@FreeBSD.org>2009-07-09 09:34:11 +0000
commit1596f53aae0639b74921f046885b0fe1b57766b4 (patch)
tree72e1f854630487c503e885bb3cc1df1eeb575521
parent2706e21f2c2e081ec96cde8bfe4e71021535961e (diff)
downloadFreeBSD-src-1596f53aae0639b74921f046885b0fe1b57766b4.zip
FreeBSD-src-1596f53aae0639b74921f046885b0fe1b57766b4.tar.gz
Restore the segment registers and segment base MSRs for amd64 syscall
return path only when neither thread was context switched while executing syscall code nor syscall explicitely modified LDT or MSRs. Save segment registers in trap handlers before interrupts are enabled, to not allow context switches to happen before registers are saved. Use separated byte in pcb for indication of fast/full return, since pcb_flags are not synchronized with context switches. The change puts back syscall microbenchmark numbers that were slowed down after commit of the support for LDT on amd64. Reviewed by: jeff Tested (and tested, and tested ...) by: pho Approved by: re (kensmith)
-rw-r--r--sys/amd64/amd64/cpu_switch.S1
-rw-r--r--sys/amd64/amd64/exception.S28
-rw-r--r--sys/amd64/amd64/genassym.c1
-rw-r--r--sys/amd64/amd64/machdep.c4
-rw-r--r--sys/amd64/amd64/sys_machdep.c8
-rw-r--r--sys/amd64/amd64/vm_machdep.c5
-rw-r--r--sys/amd64/ia32/ia32_exception.S12
-rw-r--r--sys/amd64/ia32/ia32_reg.c1
-rw-r--r--sys/amd64/ia32/ia32_signal.c7
-rw-r--r--sys/amd64/include/pcb.h3
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c5
11 files changed, 60 insertions, 15 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 6fc8290..364875e 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -97,6 +97,7 @@ END(cpu_throw)
ENTRY(cpu_switch)
/* Switch to new thread. First, save context. */
movq TD_PCB(%rdi),%r8
+ movb $1,PCB_FULL_IRET(%r8)
movq (%rsp),%rax /* Hardware registers */
movq %r15,PCB_R15(%r8)
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index daa5c25..d78e234 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -162,19 +162,20 @@ IDTVEC(align)
.globl alltraps
.type alltraps,@function
alltraps:
+ movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_testi /* already running with kernel GS.base */
swapgs
+ movq PCPU(CURPCB),%rdi
+ movb $0,PCB_FULL_IRET(%rdi)
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
alltraps_testi:
testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs
+ jz alltraps_pushregs_no_rdi
sti
-alltraps_pushregs:
- movq %rdi,TF_RDI(%rsp)
alltraps_pushregs_no_rdi:
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
@@ -233,14 +234,17 @@ calltrap:
.globl alltraps_noen
.type alltraps_noen,@function
alltraps_noen:
+ movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
+ movq PCPU(CURPCB),%rdi
+ movb $0,PCB_FULL_IRET(%rdi)
1: movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
- jmp alltraps_pushregs
+ jmp alltraps_pushregs_no_rdi
IDTVEC(dblfault)
subq $TF_ERR,%rsp
@@ -278,12 +282,13 @@ IDTVEC(dblfault)
IDTVEC(page)
subq $TF_ERR,%rsp
movl $T_PAGEFLT,TF_TRAPNO(%rsp)
+ movq %rdi,TF_RDI(%rsp) /* free up a GP register */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
-1:
- movq %rdi,TF_RDI(%rsp) /* free up a GP register */
- movq %cr2,%rdi /* preserve %cr2 before .. */
+ movq PCPU(CURPCB),%rdi
+ movb $0,PCB_FULL_IRET(%rdi)
+1: movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
@@ -311,7 +316,9 @@ IDTVEC(prot)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 2f /* already running with kernel GS.base */
1: swapgs
-2: movw %fs,TF_FS(%rsp)
+2: movq PCPU(CURPCB),%rdi
+ movb $1,PCB_FULL_IRET(%rdi) /* always full iret from GPF */
+ movw %fs,TF_FS(%rsp)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
@@ -341,6 +348,8 @@ IDTVEC(fast_syscall)
movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
+ movq PCPU(CURPCB),%r11
+ movb $0,PCB_FULL_IRET(%r11)
sti
movq $KUDSEL,TF_SS(%rsp)
movq $KUCSEL,TF_CS(%rsp)
@@ -644,7 +653,8 @@ doreti_exit:
*/
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz ld_regs
-
+ cmpb $0,PCB_FULL_IRET(%r8)
+ je ld_regs
testl $TF_HASSEGS,TF_FLAGS(%rsp)
je set_segs
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index e25f2f9..9d78309 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -141,6 +141,7 @@ ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
+ASSYM(PCB_FULL_IRET, offsetof(struct pcb, pcb_full_iret));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
ASSYM(PCB_GS32BIT, PCB_GS32BIT);
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 654e12b..41e7a03 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -382,6 +382,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
+ td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -483,6 +484,7 @@ sigreturn(td, uap)
signotify(td);
PROC_UNLOCK(p);
td->td_pcb->pcb_flags |= PCB_FULLCTX;
+ td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@@ -853,6 +855,7 @@ exec_setregs(td, entry, stack, ps_strings)
pcb->pcb_gsbase = 0;
pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
+ pcb->pcb_full_iret = 1;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
@@ -2031,6 +2034,7 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
}
td->td_pcb->pcb_flags |= PCB_FULLCTX;
+ td->td_pcb->pcb_full_iret = 1;
return (0);
}
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index c01ead2..1cba8a2 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -103,6 +103,7 @@ sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
error = amd64_get_ldt(td, largs);
break;
case I386_SET_LDT:
+ td->td_pcb->pcb_full_iret = 1;
if (largs->descs != NULL) {
lp = (struct user_segment_descriptor *)
kmem_alloc(kernel_map, largs->num *
@@ -132,6 +133,7 @@ update_gdt_gsbase(struct thread *td, uint32_t base)
if (td != curthread)
return;
+ td->td_pcb->pcb_full_iret = 1;
critical_enter();
sd = PCPU_GET(gs32p);
sd->sd_lobase = base & 0xffffff;
@@ -146,6 +148,7 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
if (td != curthread)
return;
+ td->td_pcb->pcb_full_iret = 1;
critical_enter();
sd = PCPU_GET(fs32p);
sd->sd_lobase = base & 0xffffff;
@@ -201,6 +204,7 @@ sysarch(td, uap)
if (!error) {
pcb->pcb_fsbase = i386base;
td->td_frame->tf_fs = _ufssel;
+ pcb->pcb_full_iret = 1;
update_gdt_fsbase(td, i386base);
}
break;
@@ -212,6 +216,7 @@ sysarch(td, uap)
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
pcb->pcb_gsbase = i386base;
+ pcb->pcb_full_iret = 1;
td->td_frame->tf_gs = _ugssel;
update_gdt_gsbase(td, i386base);
}
@@ -225,6 +230,7 @@ sysarch(td, uap)
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
pcb->pcb_fsbase = a64base;
+ pcb->pcb_full_iret = 1;
td->td_frame->tf_fs = _ufssel;
} else
error = EINVAL;
@@ -240,6 +246,7 @@ sysarch(td, uap)
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
pcb->pcb_gsbase = a64base;
+ pcb->pcb_full_iret = 1;
td->td_frame->tf_gs = _ugssel;
} else
error = EINVAL;
@@ -525,6 +532,7 @@ amd64_set_ldt(td, uap, descs)
uap->start, uap->num, (void *)uap->descs);
#endif
+ td->td_pcb->pcb_full_iret = 1;
p = td->td_proc;
if (descs == NULL) {
/* Free descriptors */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 928be34..51d1d62 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -186,6 +186,9 @@ cpu_fork(td1, p2, td2, flags)
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
+ /* New segment registers. */
+ pcb2->pcb_full_iret = 1;
+
/* Copy the LDT, if necessary. */
mdp1 = &td1->td_proc->p_md;
mdp2 = &p2->p_md;
@@ -336,6 +339,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
*/
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
pcb2->pcb_flags &= ~PCB_FPUINITDONE;
+ pcb2->pcb_full_iret = 1;
/*
* Create a new fresh stack for the new thread.
@@ -450,6 +454,7 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
}
#endif
td->td_pcb->pcb_fsbase = (register_t)tls_base;
+ td->td_pcb->pcb_full_iret = 1;
return (0);
}
diff --git a/sys/amd64/ia32/ia32_exception.S b/sys/amd64/ia32/ia32_exception.S
index 76c5d5a..341f00e 100644
--- a/sys/amd64/ia32/ia32_exception.S
+++ b/sys/amd64/ia32/ia32_exception.S
@@ -42,10 +42,16 @@
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
swapgs
- sti
pushq $2 /* sizeof "int 0x80" */
subq $TF_ERR,%rsp /* skip over tf_trapno */
movq %rdi,TF_RDI(%rsp)
+ movq PCPU(CURPCB),%rdi
+ movb $0,PCB_FULL_IRET(%rdi)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ sti
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
@@ -60,10 +66,6 @@ IDTVEC(int0x80_syscall)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c
index 49dd4e2..83f6783 100644
--- a/sys/amd64/ia32/ia32_reg.c
+++ b/sys/amd64/ia32/ia32_reg.c
@@ -125,6 +125,7 @@ set_regs32(struct thread *td, struct reg32 *regs)
tp->tf_fs = regs->r_fs;
tp->tf_es = regs->r_es;
tp->tf_ds = regs->r_ds;
+ td->td_pcb->pcb_full_iret = 1;
tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = regs->r_edi;
tp->tf_rsi = regs->r_esi;
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index 37e8013..d7c1dd5 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -159,6 +159,7 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
ia32_get_fpcontext(td, mcp);
mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
+ td->td_pcb->pcb_full_iret = 1;
return (0);
}
@@ -201,6 +202,7 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
tp->tf_rsp = mcp->mc_esp;
tp->tf_ss = mcp->mc_ss;
td->td_pcb->pcb_flags |= PCB_FULLCTX;
+ td->td_pcb->pcb_full_iret = 1;
return (0);
}
@@ -394,6 +396,7 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_ss = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
+ td->td_pcb->pcb_full_iret = 1;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@@ -514,6 +517,7 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_ss = _udatasel;
regs->tf_ds = _udatasel;
regs->tf_es = _udatasel;
+ td->td_pcb->pcb_full_iret = 1;
/* XXXKIB leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@@ -611,6 +615,7 @@ freebsd4_freebsd32_sigreturn(td, uap)
SIG_CANTMASK(td->td_sigmask);
signotify(td);
PROC_UNLOCK(p);
+ td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
#endif /* COMPAT_FREEBSD4 */
@@ -702,6 +707,7 @@ freebsd32_sigreturn(td, uap)
SIG_CANTMASK(td->td_sigmask);
signotify(td);
PROC_UNLOCK(p);
+ td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@@ -747,5 +753,6 @@ ia32_setregs(td, entry, stack, ps_strings)
/* Return via doreti so that we can change to a different %cs */
pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
pcb->pcb_flags &= ~PCB_GS32BIT;
+ td->td_pcb->pcb_full_iret = 1;
td->td_retval[1] = 0;
}
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 7361049..b26188a 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -72,12 +72,13 @@ struct pcb {
struct savefpu pcb_save;
uint16_t pcb_initial_fpucw;
- caddr_t pcb_onfault; /* copyin/out fault recovery */
+ caddr_t pcb_onfault; /* copyin/out fault recovery */
/* 32-bit segment descriptor */
struct user_segment_descriptor pcb_gs32sd;
/* local tss, with i/o bitmap; NULL for common */
struct amd64tss *pcb_tssp;
+ char pcb_full_iret;
};
struct xpcb {
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 2626ccf..77186a1 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -423,6 +423,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
+ td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -545,6 +546,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_fs = _ufssel;
regs->tf_gs = _ugssel;
regs->tf_flags = TF_HASSEGS;
+ td->td_pcb->pcb_full_iret = 1;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -645,6 +647,7 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
regs->tf_rflags = eflags;
regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
regs->tf_ss = frame.sf_sc.sc_ss;
+ td->td_pcb->pcb_full_iret = 1;
return (EJUSTRETURN);
}
@@ -746,6 +749,7 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
regs->tf_rflags = eflags;
regs->tf_rsp = context->sc_esp_at_signal;
regs->tf_ss = context->sc_ss;
+ td->td_pcb->pcb_full_iret = 1;
/*
* call sigaltstack & ignore results..
@@ -864,6 +868,7 @@ exec_linux_setregs(td, entry, stack, ps_strings)
regs->tf_flags = TF_HASSEGS;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
+ td->td_pcb->pcb_full_iret = 1;
load_cr0(rcr0() | CR0_MP | CR0_TS);
fpstate_drop(td);
OpenPOWER on IntegriCloud